diff options
author | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
commit | 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch) | |
tree | 4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/iconv | |
parent | 199fc19d3aaaf57944ef036e15904febe877fc93 (diff) | |
download | glibc-zack/build-layout-experiment.tar glibc-zack/build-layout-experiment.tar.gz glibc-zack/build-layout-experiment.tar.bz2 glibc-zack/build-layout-experiment.zip |
Prepare for radical source tree reorganization.zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage
directory, REORG.TODO, except for files that will certainly still
exist in their current form at top level when we're done (COPYING,
COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which
are moved to the new directory OldChangeLogs, instead), and the
generated file INSTALL (which is just deleted; in the new order, there
will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/iconv')
35 files changed, 10175 insertions, 0 deletions
diff --git a/REORG.TODO/iconv/Makefile b/REORG.TODO/iconv/Makefile new file mode 100644 index 0000000000..b2fead0479 --- /dev/null +++ b/REORG.TODO/iconv/Makefile @@ -0,0 +1,79 @@ +# Copyright (C) 1997-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# +# Makefile for iconv. +# +subdir := iconv + +include ../Makeconfig + +headers = iconv.h gconv.h +routines = iconv_open iconv iconv_close \ + gconv_open gconv gconv_close gconv_db gconv_conf \ + gconv_builtin gconv_simple gconv_trans gconv_cache +routines += gconv_dl + +vpath %.c ../locale/programs ../intl + +iconv_prog-modules = iconv_charmap charmap charmap-dir linereader \ + dummy-repertoire simple-hash xstrdup xmalloc +iconvconfig-modules = strtab xmalloc hash-string +extra-objs = $(iconv_prog-modules:=.o) $(iconvconfig-modules:=.o) +CFLAGS-iconv_prog.c = -I../locale/programs +CFLAGS-iconv_charmap.c = -I../locale/programs +CFLAGS-dummy-repertoire.c = -I../locale/programs +CFLAGS-charmap.c = -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + -DDEFAULT_CHARMAP=null_pointer -DNEED_NULL_POINTER +CFLAGS-linereader.c = -DNO_TRANSLITERATION +CFLAGS-simple-hash.c = -I../locale + +tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 + +others = iconv_prog iconvconfig +install-others-programs = $(inst_bindir)/iconv +install-sbin = iconvconfig + +CFLAGS-gconv_cache.c += -DGCONV_DIR='"$(gconvdir)"' +CFLAGS-gconv_conf.c = -DGCONV_PATH='"$(gconvdir)"' +CFLAGS-iconvconfig.c = -DGCONV_PATH='"$(gconvdir)"' -DGCONV_DIR='"$(gconvdir)"' + +# Set libof-* for each routine. +cpp-srcs-left := $(iconv_prog-modules) $(iconvconfig-modules) +lib := iconvprogs +include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + +ifeq ($(run-built-tests),yes) +xtests-special += $(objpfx)test-iconvconfig.out +endif + +include ../Rules + +$(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) + $(do-install-program) + +$(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o) +$(objpfx)iconvconfig: $(iconvconfig-modules:%=$(objpfx)%.o) + +$(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig + (set -e; \ + tmp=$(objpfx)gconv-modules.cache.$$$$; \ + rm -f $$tmp; \ + $(make-test-out) --output=$$tmp --nostdlib $(inst_gconvdir); \ + cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \ + rm -f $$tmp) > $@; \ + $(evaluate-test) diff --git a/REORG.TODO/iconv/Versions b/REORG.TODO/iconv/Versions new file mode 100644 index 0000000000..60ab10a277 --- /dev/null +++ b/REORG.TODO/iconv/Versions @@ -0,0 +1,13 @@ +libc { + GLIBC_2.1 { + # i* + iconv; iconv_open; iconv_close; + } + GLIBC_PRIVATE { + # functions shared with iconv program + __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; + + # function used by the gconv modules + __gconv_transliterate; + } +} diff --git a/REORG.TODO/iconv/dummy-repertoire.c b/REORG.TODO/iconv/dummy-repertoire.c new file mode 100644 index 0000000000..a4db977951 --- /dev/null +++ b/REORG.TODO/iconv/dummy-repertoire.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +/* For iconv we don't have to handle repertoire maps. Provide dummy + definitions to allow the use of linereader.c unchanged. */ +#include <repertoire.h> + + +uint32_t +repertoire_find_value (const struct repertoire_t *repertoire, const char *name, + size_t len) +{ + return ILLEGAL_CHAR_VALUE; +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *repertoire, uint32_t ucs) +{ + return NULL; +} diff --git a/REORG.TODO/iconv/gconv.c b/REORG.TODO/iconv/gconv.c new file mode 100644 index 0000000000..0aab0546b9 --- /dev/null +++ b/REORG.TODO/iconv/gconv.c @@ -0,0 +1,91 @@ +/* Convert characters in input buffer using conversion descriptor to + output buffer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <dlfcn.h> +#include <stddef.h> +#include <sys/param.h> + +#include <gconv_int.h> +#include <sysdep.h> + + +int +internal_function +__gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, + unsigned char *outbufend, size_t *irreversible) +{ + size_t last_step; + int result; + + if (cd == (__gconv_t) -1L) + return __GCONV_ILLEGAL_DESCRIPTOR; + + last_step = cd->__nsteps - 1; + + assert (irreversible != NULL); + *irreversible = 0; + + cd->__data[last_step].__outbuf = outbuf != NULL ? *outbuf : NULL; + cd->__data[last_step].__outbufend = outbufend; + + __gconv_fct fct = cd->__steps->__fct; +#ifdef PTR_DEMANGLE + if (cd->__steps->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + + if (inbuf == NULL || *inbuf == NULL) + { + /* We just flush. */ + result = DL_CALL_FCT (fct, + (cd->__steps, cd->__data, NULL, NULL, NULL, + irreversible, + cd->__data[last_step].__outbuf == NULL ? 2 : 1, + 0)); + + /* If the flush was successful clear the rest of the state. */ + if (result == __GCONV_OK) + for (size_t cnt = 0; cnt <= last_step; ++cnt) + cd->__data[cnt].__invocation_counter = 0; + } + else + { + const unsigned char *last_start; + + assert (outbuf != NULL && *outbuf != NULL); + + do + { + last_start = *inbuf; + result = DL_CALL_FCT (fct, + (cd->__steps, cd->__data, inbuf, inbufend, + NULL, irreversible, 0, 0)); + } + while (result == __GCONV_EMPTY_INPUT && last_start != *inbuf + && *inbuf + cd->__steps->__min_needed_from <= inbufend); + } + + if (outbuf != NULL && *outbuf != NULL) + *outbuf = cd->__data[last_step].__outbuf; + + return result; +} diff --git a/REORG.TODO/iconv/gconv.h b/REORG.TODO/iconv/gconv.h new file mode 100644 index 0000000000..db678dba49 --- /dev/null +++ b/REORG.TODO/iconv/gconv.h @@ -0,0 +1,154 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This header provides no interface for a user to the internals of + the gconv implementation in the libc. Therefore there is no use + for these definitions beside for writing additional gconv modules. */ + +#ifndef _GCONV_H +#define _GCONV_H 1 + +#include <features.h> +#include <bits/types/__mbstate_t.h> +#include <bits/types/wint_t.h> + +#define __need_size_t +#define __need_wchar_t +#include <stddef.h> + +/* ISO 10646 value used to signal invalid value. */ +#define __UNKNOWN_10646_CHAR ((wchar_t) 0xfffd) + +/* Error codes for gconv functions. */ +enum +{ + __GCONV_OK = 0, + __GCONV_NOCONV, + __GCONV_NODB, + __GCONV_NOMEM, + + __GCONV_EMPTY_INPUT, + __GCONV_FULL_OUTPUT, + __GCONV_ILLEGAL_INPUT, + __GCONV_INCOMPLETE_INPUT, + + __GCONV_ILLEGAL_DESCRIPTOR, + __GCONV_INTERNAL_ERROR +}; + + +/* Flags the `__gconv_open' function can set. */ +enum +{ + __GCONV_IS_LAST = 0x0001, + __GCONV_IGNORE_ERRORS = 0x0002, + __GCONV_SWAP = 0x0004, + __GCONV_TRANSLIT = 0x0008 +}; + + +/* Forward declarations. */ +struct __gconv_step; +struct __gconv_step_data; +struct __gconv_loaded_object; + + +/* Type of a conversion function. */ +typedef int (*__gconv_fct) (struct __gconv_step *, struct __gconv_step_data *, + const unsigned char **, const unsigned char *, + unsigned char **, size_t *, int, int); + +/* Type of a specialized conversion function for a single byte to INTERNAL. */ +typedef wint_t (*__gconv_btowc_fct) (struct __gconv_step *, unsigned char); + +/* Constructor and destructor for local data for conversion step. */ +typedef int (*__gconv_init_fct) (struct __gconv_step *); +typedef void (*__gconv_end_fct) (struct __gconv_step *); + + +/* Description of a conversion step. */ +struct __gconv_step +{ + struct __gconv_loaded_object *__shlib_handle; + const char *__modname; + + int __counter; + + char *__from_name; + char *__to_name; + + __gconv_fct __fct; + __gconv_btowc_fct __btowc_fct; + __gconv_init_fct __init_fct; + __gconv_end_fct __end_fct; + + /* Information about the number of bytes needed or produced in this + step. This helps optimizing the buffer sizes. */ + int __min_needed_from; + int __max_needed_from; + int __min_needed_to; + int __max_needed_to; + + /* Flag whether this is a stateful encoding or not. */ + int __stateful; + + void *__data; /* Pointer to step-local data. */ +}; + +/* Additional data for steps in use of conversion descriptor. This is + allocated by the `init' function. */ +struct __gconv_step_data +{ + unsigned char *__outbuf; /* Output buffer for this step. */ + unsigned char *__outbufend; /* Address of first byte after the output + buffer. */ + + /* Is this the last module in the chain. */ + int __flags; + + /* Counter for number of invocations of the module function for this + descriptor. */ + int __invocation_counter; + + /* Flag whether this is an internal use of the module (in the mb*towc* + and wc*tomb* functions) or regular with iconv(3). */ + int __internal_use; + + __mbstate_t *__statep; + __mbstate_t __state; /* This element must not be used directly by + any module; always use STATEP! */ +}; + + +/* Combine conversion step description with data. */ +typedef struct __gconv_info +{ + size_t __nsteps; + struct __gconv_step *__steps; + __extension__ struct __gconv_step_data __data[0]; +} *__gconv_t; + +/* Transliteration using the locale's data. */ +extern int __gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char *inbufstart, + const unsigned char **inbufp, + const unsigned char *inbufend, + unsigned char **outbufstart, + size_t *irreversible); + +#endif /* gconv.h */ diff --git a/REORG.TODO/iconv/gconv_builtin.c b/REORG.TODO/iconv/gconv_builtin.c new file mode 100644 index 0000000000..111233dab5 --- /dev/null +++ b/REORG.TODO/iconv/gconv_builtin.c @@ -0,0 +1,87 @@ +/* Table for builtin transformation mapping. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> +#include <limits.h> +#include <stdint.h> +#include <string.h> + +#include <gconv_int.h> + +#include <assert.h> + + +static const struct builtin_map +{ + const char *name; + __gconv_fct fct; + __gconv_btowc_fct btowc_fct; + + int8_t min_needed_from; + int8_t max_needed_from; + int8_t min_needed_to; + int8_t max_needed_to; + +} map[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + { \ + .name = Name, \ + .fct = Fct, \ + .btowc_fct = BtowcFct, \ + \ + .min_needed_from = MinF, \ + .max_needed_from = MaxF, \ + .min_needed_to = MinT, \ + .max_needed_to = MaxT \ + }, +#define BUILTIN_ALIAS(From, To) + +#include <gconv_builtin.h> +}; + + +void +internal_function +__gconv_get_builtin_trans (const char *name, struct __gconv_step *step) +{ + size_t cnt; + + for (cnt = 0; cnt < sizeof (map) / sizeof (map[0]); ++cnt) + if (strcmp (name, map[cnt].name) == 0) + break; + + assert (cnt < sizeof (map) / sizeof (map[0])); + + step->__fct = map[cnt].fct; + step->__btowc_fct = map[cnt].btowc_fct; + step->__init_fct = NULL; + step->__end_fct = NULL; + step->__shlib_handle = NULL; + step->__modname = NULL; + + step->__min_needed_from = map[cnt].min_needed_from; + step->__max_needed_from = map[cnt].max_needed_from; + step->__min_needed_to = map[cnt].min_needed_to; + step->__max_needed_to = map[cnt].max_needed_to; + + /* None of the builtin converters handles stateful encoding. */ + step->__stateful = 0; +} diff --git a/REORG.TODO/iconv/gconv_builtin.h b/REORG.TODO/iconv/gconv_builtin.h new file mode 100644 index 0000000000..93e2e4d865 --- /dev/null +++ b/REORG.TODO/iconv/gconv_builtin.h @@ -0,0 +1,123 @@ +/* Builtin transformations. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* All encoding named must be in upper case. There must be no extra + spaces. */ + +BUILTIN_ALIAS ("UCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("UCS-4BE//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("CSUCS4//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("ISO-10646//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993//", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/") +BUILTIN_ALIAS ("OSF00010104//", "ISO-10646/UCS4/") /* level 1 */ +BUILTIN_ALIAS ("OSF00010105//", "ISO-10646/UCS4/") /* level 2 */ +BUILTIN_ALIAS ("OSF00010106//", "ISO-10646/UCS4/") /* level 3 */ + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4", + __gconv_transform_internal_ucs4, NULL, 4, 4, 4, 4) +BUILTIN_TRANSFORMATION ("ISO-10646/UCS4/", "INTERNAL", 1, "=ucs4->INTERNAL", + __gconv_transform_ucs4_internal, NULL, 4, 4, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UCS-4LE//", 1, "=INTERNAL->ucs4le", + __gconv_transform_internal_ucs4le, NULL, 4, 4, 4, 4) +BUILTIN_TRANSFORMATION ("UCS-4LE//", "INTERNAL", 1, "=ucs4le->INTERNAL", + __gconv_transform_ucs4le_internal, NULL, 4, 4, 4, 4) + +BUILTIN_ALIAS ("WCHAR_T//", "INTERNAL") + +BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("ISO-IR-193//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("OSF05010001//", "ISO-10646/UTF8/") +BUILTIN_ALIAS ("ISO-10646/UTF-8/", "ISO-10646/UTF8/") + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UTF8/", 1, "=INTERNAL->utf8", + __gconv_transform_internal_utf8, NULL, 4, 4, 1, 6) + +BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "INTERNAL", 1, "=utf8->INTERNAL", + __gconv_transform_utf8_internal, __gconv_btwoc_ascii, + 1, 6, 4, 4) + +BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("OSF00010100//", "ISO-10646/UCS2/") /* level 1 */ +BUILTIN_ALIAS ("OSF00010101//", "ISO-10646/UCS2/") /* level 2 */ +BUILTIN_ALIAS ("OSF00010102//", "ISO-10646/UCS2/") /* level 3 */ + +BUILTIN_TRANSFORMATION ("ISO-10646/UCS2/", "INTERNAL", 1, "=ucs2->INTERNAL", + __gconv_transform_ucs2_internal, NULL, 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "ISO-10646/UCS2/", 1, "=INTERNAL->ucs2", + __gconv_transform_internal_ucs2, NULL, 4, 4, 2, 2) + + +BUILTIN_ALIAS ("ANSI_X3.4//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO-IR-6//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ANSI_X3.4-1986//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO_646.IRV:1991//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("ISO646-US//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("US-ASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("US//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("IBM367//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("CP367//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("CSASCII//", "ANSI_X3.4-1968//") +BUILTIN_ALIAS ("OSF00010020//", "ANSI_X3.4-1968//") + +BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "INTERNAL", 1, "=ascii->INTERNAL", + __gconv_transform_ascii_internal, __gconv_btwoc_ascii, + 1, 1, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "ANSI_X3.4-1968//", 1, "=INTERNAL->ascii", + __gconv_transform_internal_ascii, NULL, 4, 4, 1, 1) + + +#if BYTE_ORDER == BIG_ENDIAN +BUILTIN_ALIAS ("UNICODEBIG//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2BE//", "ISO-10646/UCS2/") + +BUILTIN_ALIAS ("UCS-2LE//", "UNICODELITTLE//") + +BUILTIN_TRANSFORMATION ("UNICODELITTLE//", "INTERNAL", 1, + "=ucs2reverse->INTERNAL", + __gconv_transform_ucs2reverse_internal, NULL, + 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODELITTLE//", 1, + "=INTERNAL->ucs2reverse", + __gconv_transform_internal_ucs2reverse, NULL, + 4, 4, 2, 2) +#else +BUILTIN_ALIAS ("UNICODELITTLE//", "ISO-10646/UCS2/") +BUILTIN_ALIAS ("UCS-2LE//", "ISO-10646/UCS2/") + +BUILTIN_ALIAS ("UCS-2BE//", "UNICODEBIG//") + +BUILTIN_TRANSFORMATION ("UNICODEBIG//", "INTERNAL", 1, + "=ucs2reverse->INTERNAL", + __gconv_transform_ucs2reverse_internal, NULL, + 2, 2, 4, 4) + +BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, + "=INTERNAL->ucs2reverse", + __gconv_transform_internal_ucs2reverse, NULL, + 4, 4, 2, 2) +#endif diff --git a/REORG.TODO/iconv/gconv_cache.c b/REORG.TODO/iconv/gconv_cache.c new file mode 100644 index 0000000000..0cadea3638 --- /dev/null +++ b/REORG.TODO/iconv/gconv_cache.c @@ -0,0 +1,472 @@ +/* Cache handling for iconv modules. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <gconv_int.h> +#include <iconvconfig.h> +#include <not-cancel.h> + +#include "../intl/hash-string.h" + +static void *gconv_cache; +static size_t cache_size; +static int cache_malloced; + + +void * +__gconv_get_cache (void) +{ + return gconv_cache; +} + + +int +internal_function +__gconv_load_cache (void) +{ + int fd; + struct stat64 st; + struct gconvcache_header *header; + + /* We cannot use the cache if the GCONV_PATH environment variable is + set. */ + __gconv_path_envvar = getenv ("GCONV_PATH"); + if (__gconv_path_envvar != NULL) + return -1; + + /* See whether the cache file exists. */ + fd = open_not_cancel (GCONV_MODULES_CACHE, O_RDONLY, 0); + if (__builtin_expect (fd, 0) == -1) + /* Not available. */ + return -1; + + /* Get information about the file. */ + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0 + /* We do not have to start looking at the file if it cannot contain + at least the cache header. */ + || (size_t) st.st_size < sizeof (struct gconvcache_header)) + { + close_and_exit: + close_not_cancel_no_status (fd); + return -1; + } + + /* Make the file content available. */ + cache_size = st.st_size; +#ifdef _POSIX_MAPPED_FILES + gconv_cache = __mmap (NULL, cache_size, PROT_READ, MAP_SHARED, fd, 0); + if (__glibc_unlikely (gconv_cache == MAP_FAILED)) +#endif + { + size_t already_read; + + gconv_cache = malloc (cache_size); + if (gconv_cache == NULL) + goto close_and_exit; + + already_read = 0; + do + { + ssize_t n = __read (fd, (char *) gconv_cache + already_read, + cache_size - already_read); + if (__builtin_expect (n, 0) == -1) + { + free (gconv_cache); + gconv_cache = NULL; + goto close_and_exit; + } + + already_read += n; + } + while (already_read < cache_size); + + cache_malloced = 1; + } + + /* We don't need the file descriptor anymore. */ + close_not_cancel_no_status (fd); + + /* Check the consistency. */ + header = (struct gconvcache_header *) gconv_cache; + if (__builtin_expect (header->magic, GCONVCACHE_MAGIC) != GCONVCACHE_MAGIC + || __builtin_expect (header->string_offset >= cache_size, 0) + || __builtin_expect (header->hash_offset >= cache_size, 0) + || __builtin_expect (header->hash_size == 0, 0) + || __builtin_expect ((header->hash_offset + + header->hash_size * sizeof (struct hash_entry)) + > cache_size, 0) + || __builtin_expect (header->module_offset >= cache_size, 0) + || __builtin_expect (header->otherconv_offset > cache_size, 0)) + { + if (cache_malloced) + { + free (gconv_cache); + cache_malloced = 0; + } +#ifdef _POSIX_MAPPED_FILES + else + __munmap (gconv_cache, cache_size); +#endif + gconv_cache = NULL; + + return -1; + } + + /* That worked. */ + return 0; +} + + +static int +internal_function +find_module_idx (const char *str, size_t *idxp) +{ + unsigned int idx; + unsigned int hval; + unsigned int hval2; + const struct gconvcache_header *header; + const char *strtab; + const struct hash_entry *hashtab; + unsigned int limit; + + header = (const struct gconvcache_header *) gconv_cache; + strtab = (char *) gconv_cache + header->string_offset; + hashtab = (struct hash_entry *) ((char *) gconv_cache + + header->hash_offset); + + hval = __hash_string (str); + idx = hval % header->hash_size; + hval2 = 1 + hval % (header->hash_size - 2); + + limit = cache_size - header->string_offset; + while (hashtab[idx].string_offset != 0) + if (hashtab[idx].string_offset < limit + && strcmp (str, strtab + hashtab[idx].string_offset) == 0) + { + *idxp = hashtab[idx].module_idx; + return 0; + } + else + if ((idx += hval2) >= header->hash_size) + idx -= header->hash_size; + + /* Nothing found. */ + return -1; +} + + +#ifndef STATIC_GCONV +static int +internal_function +find_module (const char *directory, const char *filename, + struct __gconv_step *result) +{ + size_t dirlen = strlen (directory); + size_t fnamelen = strlen (filename) + 1; + char fullname[dirlen + fnamelen]; + int status = __GCONV_NOCONV; + + memcpy (__mempcpy (fullname, directory, dirlen), filename, fnamelen); + + result->__shlib_handle = __gconv_find_shlib (fullname); + if (result->__shlib_handle != NULL) + { + status = __GCONV_OK; + + result->__modname = NULL; + result->__fct = result->__shlib_handle->fct; + result->__init_fct = result->__shlib_handle->init_fct; + result->__end_fct = result->__shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + result->__btowc_fct = NULL; + result->__data = NULL; + + /* Call the init function. */ + if (result->__init_fct != NULL) + { + __gconv_init_fct init_fct = result->__init_fct; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (init_fct); +#endif + status = DL_CALL_FCT (init_fct, (result)); + +#ifdef PTR_MANGLE + if (result->__btowc_fct != NULL) + PTR_MANGLE (result->__btowc_fct); +#endif + } + } + + return status; +} +#endif + + +int +internal_function +__gconv_compare_alias_cache (const char *name1, const char *name2, int *result) +{ + size_t name1_idx; + size_t name2_idx; + + if (gconv_cache == NULL) + return -1; + + if (find_module_idx (name1, &name1_idx) != 0 + || find_module_idx (name2, &name2_idx) != 0) + *result = strcmp (name1, name2); + else + *result = (int) (name1_idx - name2_idx); + + return 0; +} + + +int +internal_function +__gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, int flags) +{ + const struct gconvcache_header *header; + const char *strtab; + size_t fromidx; + size_t toidx; + const struct module_entry *modtab; + const struct module_entry *from_module; + const struct module_entry *to_module; + struct __gconv_step *result; + + if (gconv_cache == NULL) + /* We have no cache available. */ + return __GCONV_NODB; + + header = (const struct gconvcache_header *) gconv_cache; + strtab = (char *) gconv_cache + header->string_offset; + modtab = (const struct module_entry *) ((char *) gconv_cache + + header->module_offset); + + if (find_module_idx (fromset, &fromidx) != 0 + || (header->module_offset + (fromidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + from_module = &modtab[fromidx]; + + if (find_module_idx (toset, &toidx) != 0 + || (header->module_offset + (toidx + 1) * sizeof (struct module_entry) + > cache_size)) + return __GCONV_NOCONV; + to_module = &modtab[toidx]; + + /* Avoid copy-only transformations if the user requests. */ + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) && fromidx == toidx) + return __GCONV_NULCONV; + + /* If there are special conversions available examine them first. */ + if (fromidx != 0 && toidx != 0 + && __builtin_expect (from_module->extra_offset, 0) != 0) + { + /* Search through the list to see whether there is a module + matching the destination character set. */ + const struct extra_entry *extra; + + /* Note the -1. This is due to the offset added in iconvconfig. + See there for more explanations. */ + extra = (const struct extra_entry *) ((char *) gconv_cache + + header->otherconv_offset + + from_module->extra_offset - 1); + while (extra->module_cnt != 0 + && extra->module[extra->module_cnt - 1].outname_offset != toidx) + extra = (const struct extra_entry *) ((char *) extra + + sizeof (struct extra_entry) + + (extra->module_cnt + * sizeof (struct extra_entry_module))); + + if (extra->module_cnt != 0) + { + /* Use the extra module. First determine how many steps. */ + char *fromname; + int idx; + + *nsteps = extra->module_cnt; + *handle = result = + (struct __gconv_step *) malloc (extra->module_cnt + * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + fromname = (char *) strtab + from_module->canonname_offset; + idx = 0; + do + { + result[idx].__from_name = fromname; + fromname = result[idx].__to_name = + (char *) strtab + modtab[extra->module[idx].outname_offset].canonname_offset; + + result[idx].__counter = 1; + result[idx].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[extra->module[idx].dir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res; + + res = find_module (strtab + extra->module[idx].dir_offset, + strtab + extra->module[idx].name_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + goto try_internal; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + + extra->module[idx].name_offset, + &result[idx]); + + } + while (++idx < extra->module_cnt); + + return __GCONV_OK; + } + } + + try_internal: + /* See whether we can convert via the INTERNAL charset. */ + if ((fromidx != 0 && __builtin_expect (from_module->fromname_offset, 1) == 0) + || (toidx != 0 && __builtin_expect (to_module->toname_offset, 1) == 0) + || (fromidx == 0 && toidx == 0)) + /* Not possible. Nothing we can do. */ + return __GCONV_NOCONV; + + /* We will use up to two modules. Always allocate room for two. */ + result = (struct __gconv_step *) malloc (2 * sizeof (struct __gconv_step)); + if (result == NULL) + return __GCONV_NOMEM; + + *handle = result; + *nsteps = 0; + + /* Generate data structure for conversion to INTERNAL. */ + if (fromidx != 0) + { + result[0].__from_name = (char *) strtab + from_module->canonname_offset; + result[0].__to_name = (char *) "INTERNAL"; + + result[0].__counter = 1; + result[0].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[from_module->todir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + from_module->todir_offset, + strtab + from_module->toname_offset, + &result[0]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + from_module->toname_offset, + &result[0]); + + ++*nsteps; + } + + /* Generate data structure for conversion from INTERNAL. */ + if (toidx != 0) + { + int idx = *nsteps; + + result[idx].__from_name = (char *) "INTERNAL"; + result[idx].__to_name = (char *) strtab + to_module->canonname_offset; + + result[idx].__counter = 1; + result[idx].__data = NULL; + +#ifndef STATIC_GCONV + if (strtab[to_module->fromdir_offset] != '\0') + { + /* Load the module, return handle for it. */ + int res = find_module (strtab + to_module->fromdir_offset, + strtab + to_module->fromname_offset, + &result[idx]); + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* Something went wrong. */ + if (idx != 0) + __gconv_release_step (&result[0]); + free (result); + return res; + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (strtab + to_module->fromname_offset, + &result[idx]); + + ++*nsteps; + } + + return __GCONV_OK; +} + + +/* Free memory allocated for the transformation record. */ +void +internal_function +__gconv_release_cache (struct __gconv_step *steps, size_t nsteps) +{ + if (gconv_cache != NULL) + /* The only thing we have to deallocate is the record with the + steps. */ + free (steps); +} + + +/* Free all resources if necessary. */ +libc_freeres_fn (free_mem) +{ + if (cache_malloced) + free (gconv_cache); +#ifdef _POSIX_MAPPED_FILES + else if (gconv_cache != NULL) + __munmap (gconv_cache, cache_size); +#endif +} diff --git a/REORG.TODO/iconv/gconv_charset.h b/REORG.TODO/iconv/gconv_charset.h new file mode 100644 index 0000000000..18d8bd6ae7 --- /dev/null +++ b/REORG.TODO/iconv/gconv_charset.h @@ -0,0 +1,57 @@ +/* Charset name normalization. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ctype.h> +#include <locale.h> + + +static void +strip (char *wp, const char *s) +{ + int slash_count = 0; + + while (*s != '\0') + { + if (__isalnum_l (*s, _nl_C_locobj_ptr) + || *s == '_' || *s == '-' || *s == '.' || *s == ',' || *s == ':') + *wp++ = __toupper_l (*s, _nl_C_locobj_ptr); + else if (*s == '/') + { + if (++slash_count == 3) + break; + *wp++ = '/'; + } + ++s; + } + + while (slash_count++ < 2) + *wp++ = '/'; + + *wp = '\0'; +} + + +static inline char * __attribute__ ((unused, always_inline)) +upstr (char *dst, const char *str) +{ + char *cp = dst; + while ((*cp++ = __toupper_l (*str++, _nl_C_locobj_ptr)) != '\0') + /* nothing */; + return dst; +} diff --git a/REORG.TODO/iconv/gconv_close.c b/REORG.TODO/iconv/gconv_close.c new file mode 100644 index 0000000000..4853dd8779 --- /dev/null +++ b/REORG.TODO/iconv/gconv_close.c @@ -0,0 +1,50 @@ +/* Release any resource associated with given conversion descriptor. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> + +#include <gconv_int.h> + + +int +internal_function +__gconv_close (__gconv_t cd) +{ + struct __gconv_step *srunp; + struct __gconv_step_data *drunp; + size_t nsteps; + + /* Free all resources by calling destructor functions and release + the implementations. */ + srunp = cd->__steps; + nsteps = cd->__nsteps; + drunp = cd->__data; + do + { + if (!(drunp->__flags & __GCONV_IS_LAST) && drunp->__outbuf != NULL) + free (drunp->__outbuf); + } + while (!((drunp++)->__flags & __GCONV_IS_LAST)); + + /* Free the data allocated for the descriptor. */ + free (cd); + + /* Close the participating modules. */ + return __gconv_close_transform (srunp, nsteps); +} diff --git a/REORG.TODO/iconv/gconv_conf.c b/REORG.TODO/iconv/gconv_conf.c new file mode 100644 index 0000000000..5aa055de6e --- /dev/null +++ b/REORG.TODO/iconv/gconv_conf.c @@ -0,0 +1,616 @@ +/* Handle configuration data. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <search.h> +#include <stddef.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/param.h> + +#include <libc-lock.h> +#include <gconv_int.h> + + +/* This is the default path where we look for module lists. */ +static const char default_gconv_path[] = GCONV_PATH; + +/* The path elements, as determined by the __gconv_get_path function. + All path elements end in a slash. */ +struct path_elem *__gconv_path_elem; +/* Maximum length of a single path element in __gconv_path_elem. */ +size_t __gconv_max_path_elem_len; + +/* We use the following struct if we couldn't allocate memory. */ +static const struct path_elem empty_path_elem = { NULL, 0 }; + +/* Name of the file containing the module information in the directories + along the path. */ +static const char gconv_conf_filename[] = "gconv-modules"; + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + +/* We have a few builtin transformations. */ +static struct gconv_module builtin_modules[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + { \ + .from_string = From, \ + .to_string = To, \ + .cost_hi = Cost, \ + .cost_lo = INT_MAX, \ + .module_name = Name \ + }, +#define BUILTIN_ALIAS(From, To) + +#include "gconv_builtin.h" + +#undef BUILTIN_TRANSFORMATION +#undef BUILTIN_ALIAS +}; + +static const char builtin_aliases[] = +{ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) +#define BUILTIN_ALIAS(From, To) From "\0" To "\0" + +#include "gconv_builtin.h" + +#undef BUILTIN_TRANSFORMATION +#undef BUILTIN_ALIAS +}; + +#include <libio/libioP.h> +#define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp) + + +/* Value of the GCONV_PATH environment variable. */ +const char *__gconv_path_envvar; + + +/* Test whether there is already a matching module known. */ +static int +internal_function +detect_conflict (const char *alias) +{ + struct gconv_module *node = __gconv_modules_db; + + while (node != NULL) + { + int cmpres = strcmp (alias, node->from_string); + + if (cmpres == 0) + /* We have a conflict. */ + return 1; + else if (cmpres < 0) + node = node->left; + else + node = node->right; + } + + return node != NULL; +} + + +/* The actual code to add aliases. */ +static void +add_alias2 (const char *from, const char *to, const char *wp, void *modules) +{ + /* Test whether this alias conflicts with any available module. */ + if (detect_conflict (from)) + /* It does conflict, don't add the alias. */ + return; + + struct gconv_alias *new_alias = (struct gconv_alias *) + malloc (sizeof (struct gconv_alias) + (wp - from)); + if (new_alias != NULL) + { + void **inserted; + + new_alias->fromname = memcpy ((char *) new_alias + + sizeof (struct gconv_alias), + from, wp - from); + new_alias->toname = new_alias->fromname + (to - from); + + inserted = (void **) __tsearch (new_alias, &__gconv_alias_db, + __gconv_alias_compare); + if (inserted == NULL || *inserted != new_alias) + /* Something went wrong, free this entry. */ + free (new_alias); + } +} + + +/* Add new alias. */ +static void +add_alias (char *rp, void *modules) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + char *from, *to, *wp; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); + if (*rp == '\0') + /* There is no `to' string on the line. Ignore it. */ + return; + *wp++ = '\0'; + to = ++rp; + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); + if (to == wp) + /* No `to' string, ignore the line. */ + return; + *wp++ = '\0'; + + add_alias2 (from, to, wp, modules); +} + + +/* Insert a data structure for a new module in the search tree. */ +static void +internal_function +insert_module (struct gconv_module *newp, int tobefreed) +{ + struct gconv_module **rootp = &__gconv_modules_db; + + while (*rootp != NULL) + { + struct gconv_module *root = *rootp; + int cmpres; + + cmpres = strcmp (newp->from_string, root->from_string); + if (cmpres == 0) + { + /* Both strings are identical. Insert the string at the + end of the `same' list if it is not already there. */ + while (strcmp (newp->from_string, root->from_string) != 0 + || strcmp (newp->to_string, root->to_string) != 0) + { + rootp = &root->same; + root = *rootp; + if (root == NULL) + break; + } + + if (root != NULL) + { + /* This is a no new conversion. But maybe the cost is + better. */ + if (newp->cost_hi < root->cost_hi + || (newp->cost_hi == root->cost_hi + && newp->cost_lo < root->cost_lo)) + { + newp->left = root->left; + newp->right = root->right; + newp->same = root->same; + *rootp = newp; + + free (root); + } + else if (tobefreed) + free (newp); + return; + } + + break; + } + else if (cmpres < 0) + rootp = &root->left; + else + rootp = &root->right; + } + + /* Plug in the new node here. */ + *rootp = newp; +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory, size_t dir_len, void **modules, + size_t *nmodules, int modcounter) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + struct gconv_alias fake_alias; + struct gconv_module *new_module; + char *from, *to, *module, *wp; + int need_ext; + int cost_hi; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + from = rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + { + *rp = __toupper_l (*rp, _nl_C_locobj_ptr); + ++rp; + } + if (*rp == '\0') + return; + *rp++ = '\0'; + to = wp = rp; + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); + if (*rp == '\0') + return; + *wp++ = '\0'; + do + ++rp; + while (__isspace_l (*rp, _nl_C_locobj_ptr)); + module = wp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost_hi = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost_hi = strtol (rp, &endp, 10); + if (rp == endp || cost_hi < 1) + /* No useful information. */ + cost_hi = 1; + } + + if (module[0] == '\0') + /* No module name given. */ + return; + if (module[0] == '/') + dir_len = 0; + + /* See whether we must add the ending. */ + need_ext = 0; + if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + /* See whether we have already an alias with this name defined. */ + fake_alias.fromname = strndupa (from, to - from); + + if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL) + /* This module duplicates an alias. */ + return; + + new_module = (struct gconv_module *) calloc (1, + sizeof (struct gconv_module) + + (wp - from) + + dir_len + need_ext); + if (new_module != NULL) + { + char *tmp; + + new_module->from_string = tmp = (char *) (new_module + 1); + tmp = __mempcpy (tmp, from, to - from); + + new_module->to_string = tmp; + tmp = __mempcpy (tmp, to, module - to); + + new_module->cost_hi = cost_hi; + new_module->cost_lo = modcounter; + + new_module->module_name = tmp; + + if (dir_len != 0) + tmp = __mempcpy (tmp, directory, dir_len); + + tmp = __mempcpy (tmp, module, wp - module); + + if (need_ext) + memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext)); + + /* Now insert the new module data structure in our search tree. */ + insert_module (new_module, 1); + } +} + + +/* Read the next configuration file. */ +static void +internal_function +read_conf_file (const char *filename, const char *directory, size_t dir_len, + void **modules, size_t *nmodules) +{ + /* Note the file is opened with cancellation in the I/O functions + disabled. */ + FILE *fp = fopen (filename, "rce"); + char *line = NULL; + size_t line_len = 0; + static int modcounter; + + /* Don't complain if a file is not present or readable, simply silently + ignore it. */ + if (fp == NULL) + return; + + /* No threads reading from this stream. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + /* Process the known entries of the file. Comments start with `#' and + end with the end of the line. Empty lines are ignored. */ + while (!feof_unlocked (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &line_len, '\n', fp); + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) by an NUL byte + to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp, *modules); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, directory, dir_len, modules, nmodules, modcounter++); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); +} + + +/* Determine the directories we are looking for data in. */ +void +internal_function +__gconv_get_path (void) +{ + struct path_elem *result; + __libc_lock_define_initialized (static, lock); + + __libc_lock_lock (lock); + + /* Make sure there wasn't a second thread doing it already. */ + result = (struct path_elem *) __gconv_path_elem; + if (result == NULL) + { + /* Determine the complete path first. */ + char *gconv_path; + size_t gconv_path_len; + char *elem; + char *oldp; + char *cp; + int nelems; + char *cwd; + size_t cwdlen; + + if (__gconv_path_envvar == NULL) + { + /* No user-defined path. Make a modifiable copy of the + default path. */ + gconv_path = strdupa (default_gconv_path); + gconv_path_len = sizeof (default_gconv_path); + cwd = NULL; + cwdlen = 0; + } + else + { + /* Append the default path to the user-defined path. */ + size_t user_len = strlen (__gconv_path_envvar); + + gconv_path_len = user_len + 1 + sizeof (default_gconv_path); + gconv_path = alloca (gconv_path_len); + __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar, + user_len), + ":", 1), + default_gconv_path, sizeof (default_gconv_path)); + cwd = __getcwd (NULL, 0); + cwdlen = __glibc_unlikely (cwd == NULL) ? 0 : strlen (cwd); + } + assert (default_gconv_path[0] == '/'); + + /* In a first pass we calculate the number of elements. */ + oldp = NULL; + cp = strchr (gconv_path, ':'); + nelems = 1; + while (cp != NULL) + { + if (cp != oldp + 1) + ++nelems; + oldp = cp; + cp = strchr (cp + 1, ':'); + } + + /* Allocate the memory for the result. */ + result = (struct path_elem *) malloc ((nelems + 1) + * sizeof (struct path_elem) + + gconv_path_len + nelems + + (nelems - 1) * (cwdlen + 1)); + if (result != NULL) + { + char *strspace = (char *) &result[nelems + 1]; + int n = 0; + + /* Separate the individual parts. */ + __gconv_max_path_elem_len = 0; + elem = __strtok_r (gconv_path, ":", &gconv_path); + assert (elem != NULL); + do + { + result[n].name = strspace; + if (elem[0] != '/') + { + assert (cwd != NULL); + strspace = __mempcpy (strspace, cwd, cwdlen); + *strspace++ = '/'; + } + strspace = __stpcpy (strspace, elem); + if (strspace[-1] != '/') + *strspace++ = '/'; + + result[n].len = strspace - result[n].name; + if (result[n].len > __gconv_max_path_elem_len) + __gconv_max_path_elem_len = result[n].len; + + *strspace++ = '\0'; + ++n; + } + while ((elem = __strtok_r (NULL, ":", &gconv_path)) != NULL); + + result[n].name = NULL; + result[n].len = 0; + } + + __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem; + + free (cwd); + } + + __libc_lock_unlock (lock); +} + + +/* Read all configuration files found in the user-specified and the default + path. */ +void +attribute_hidden +__gconv_read_conf (void) +{ + void *modules = NULL; + size_t nmodules = 0; + int save_errno = errno; + size_t cnt; + + /* First see whether we should use the cache. */ + if (__gconv_load_cache () == 0) + { + /* Yes, we are done. */ + __set_errno (save_errno); + return; + } + +#ifndef STATIC_GCONV + /* Find out where we have to look. */ + if (__gconv_path_elem == NULL) + __gconv_get_path (); + + for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt) + { + const char *elem = __gconv_path_elem[cnt].name; + size_t elem_len = __gconv_path_elem[cnt].len; + char *filename; + + /* No slash needs to be inserted between elem and gconv_conf_filename; + elem already ends in a slash. */ + filename = alloca (elem_len + sizeof (gconv_conf_filename)); + __mempcpy (__mempcpy (filename, elem, elem_len), + gconv_conf_filename, sizeof (gconv_conf_filename)); + + /* Read the next configuration file. */ + read_conf_file (filename, elem, elem_len, &modules, &nmodules); + } +#endif + + /* Add the internal modules. */ + for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); + ++cnt) + { + struct gconv_alias fake_alias; + + fake_alias.fromname = (char *) builtin_modules[cnt].from_string; + + if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) + != NULL) + /* It'll conflict so don't add it. */ + continue; + + insert_module (&builtin_modules[cnt], 0); + } + + /* Add aliases for builtin conversions. */ + const char *cp = builtin_aliases; + do + { + const char *from = cp; + const char *to = __rawmemchr (from, '\0') + 1; + cp = __rawmemchr (to, '\0') + 1; + + add_alias2 (from, to, cp, modules); + } + while (*cp != '\0'); + + /* Restore the error number. */ + __set_errno (save_errno); +} + + + +/* Free all resources if necessary. */ +libc_freeres_fn (free_mem) +{ + if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem) + free ((void *) __gconv_path_elem); +} diff --git a/REORG.TODO/iconv/gconv_db.c b/REORG.TODO/iconv/gconv_db.c new file mode 100644 index 0000000000..7893fadba1 --- /dev/null +++ b/REORG.TODO/iconv/gconv_db.c @@ -0,0 +1,870 @@ +/* Provide access to the collection of available transformation modules. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <limits.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> +#include <libc-lock.h> +#include <locale/localeinfo.h> + +#include <dlfcn.h> +#include <gconv_int.h> +#include <sysdep.h> + + +/* Simple data structure for alias mapping. We have two names, `from' + and `to'. */ +void *__gconv_alias_db; + +/* Array with available modules. */ +struct gconv_module *__gconv_modules_db; + +/* We modify global data. */ +__libc_lock_define_initialized (, __gconv_lock) + + +/* Provide access to module database. */ +struct gconv_module * +__gconv_get_modules_db (void) +{ + return __gconv_modules_db; +} + +void * +__gconv_get_alias_db (void) +{ + return __gconv_alias_db; +} + + +/* Function for searching alias. */ +int +__gconv_alias_compare (const void *p1, const void *p2) +{ + const struct gconv_alias *s1 = (const struct gconv_alias *) p1; + const struct gconv_alias *s2 = (const struct gconv_alias *) p2; + return strcmp (s1->fromname, s2->fromname); +} + + +/* To search for a derivation we create a list of intermediate steps. + Each element contains a pointer to the element which precedes it + in the derivation order. */ +struct derivation_step +{ + const char *result_set; + size_t result_set_len; + int cost_lo; + int cost_hi; + struct gconv_module *code; + struct derivation_step *last; + struct derivation_step *next; +}; + +#define NEW_STEP(result, hi, lo, module, last_mod) \ + ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \ + newp->result_set = result; \ + newp->result_set_len = strlen (result); \ + newp->cost_hi = hi; \ + newp->cost_lo = lo; \ + newp->code = module; \ + newp->last = last_mod; \ + newp->next = NULL; \ + newp; }) + + +/* If a specific transformation is used more than once we should not need + to start looking for it again. Instead cache each successful result. */ +struct known_derivation +{ + const char *from; + const char *to; + struct __gconv_step *steps; + size_t nsteps; +}; + +/* Compare function for database of found derivations. */ +static int +derivation_compare (const void *p1, const void *p2) +{ + const struct known_derivation *s1 = (const struct known_derivation *) p1; + const struct known_derivation *s2 = (const struct known_derivation *) p2; + int result; + + result = strcmp (s1->from, s2->from); + if (result == 0) + result = strcmp (s1->to, s2->to); + return result; +} + +/* The search tree for known derivations. */ +static void *known_derivations; + +/* Look up whether given transformation was already requested before. */ +static int +internal_function +derivation_lookup (const char *fromset, const char *toset, + struct __gconv_step **handle, size_t *nsteps) +{ + struct known_derivation key = { fromset, toset, NULL, 0 }; + struct known_derivation **result; + + result = __tfind (&key, &known_derivations, derivation_compare); + + if (result == NULL) + return __GCONV_NOCONV; + + *handle = (*result)->steps; + *nsteps = (*result)->nsteps; + + /* Please note that we return GCONV_OK even if the last search for + this transformation was unsuccessful. */ + return __GCONV_OK; +} + +/* Add new derivation to list of known ones. */ +static void +internal_function +add_derivation (const char *fromset, const char *toset, + struct __gconv_step *handle, size_t nsteps) +{ + struct known_derivation *new_deriv; + size_t fromset_len = strlen (fromset) + 1; + size_t toset_len = strlen (toset) + 1; + + new_deriv = (struct known_derivation *) + malloc (sizeof (struct known_derivation) + fromset_len + toset_len); + if (new_deriv != NULL) + { + new_deriv->from = (char *) (new_deriv + 1); + new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len), + toset, toset_len); + + new_deriv->steps = handle; + new_deriv->nsteps = nsteps; + + if (__tsearch (new_deriv, &known_derivations, derivation_compare) + == NULL) + /* There is some kind of memory allocation problem. */ + free (new_deriv); + } + /* Please note that we don't complain if the allocation failed. This + is not tragically but in case we use the memory debugging facilities + not all memory will be freed. */ +} + +static void __libc_freeres_fn_section +free_derivation (void *p) +{ + struct known_derivation *deriv = (struct known_derivation *) p; + size_t cnt; + + for (cnt = 0; cnt < deriv->nsteps; ++cnt) + if (deriv->steps[cnt].__counter > 0 + && deriv->steps[cnt].__end_fct != NULL) + { + assert (deriv->steps[cnt].__shlib_handle != NULL); + + __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (end_fct); +#endif + DL_CALL_FCT (end_fct, (&deriv->steps[cnt])); + } + + /* Free the name strings. */ + if (deriv->steps != NULL) + { + free ((char *) deriv->steps[0].__from_name); + free ((char *) deriv->steps[deriv->nsteps - 1].__to_name); + free ((struct __gconv_step *) deriv->steps); + } + + free (deriv); +} + + +/* Decrement the reference count for a single step in a steps array. */ +void +internal_function +__gconv_release_step (struct __gconv_step *step) +{ + /* Skip builtin modules; they are not reference counted. */ + if (step->__shlib_handle != NULL && --step->__counter == 0) + { + /* Call the destructor. */ + if (step->__end_fct != NULL) + { + assert (step->__shlib_handle != NULL); + + __gconv_end_fct end_fct = step->__end_fct; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (end_fct); +#endif + DL_CALL_FCT (end_fct, (step)); + } + +#ifndef STATIC_GCONV + /* Release the loaded module. */ + __gconv_release_shlib (step->__shlib_handle); + step->__shlib_handle = NULL; +#endif + } + else if (step->__shlib_handle == NULL) + /* Builtin modules should not have end functions. */ + assert (step->__end_fct == NULL); +} + +static int +internal_function +gen_steps (struct derivation_step *best, const char *toset, + const char *fromset, struct __gconv_step **handle, size_t *nsteps) +{ + size_t step_cnt = 0; + struct __gconv_step *result; + struct derivation_step *current; + int status = __GCONV_NOMEM; + char *from_name = NULL; + char *to_name = NULL; + + /* First determine number of steps. */ + for (current = best; current->last != NULL; current = current->last) + ++step_cnt; + + result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step) + * step_cnt); + if (result != NULL) + { + int failed = 0; + + status = __GCONV_OK; + *nsteps = step_cnt; + current = best; + while (step_cnt-- > 0) + { + if (step_cnt == 0) + { + result[step_cnt].__from_name = from_name = __strdup (fromset); + if (from_name == NULL) + { + failed = 1; + break; + } + } + else + result[step_cnt].__from_name = (char *)current->last->result_set; + + if (step_cnt + 1 == *nsteps) + { + result[step_cnt].__to_name = to_name + = __strdup (current->result_set); + if (to_name == NULL) + { + failed = 1; + break; + } + } + else + result[step_cnt].__to_name = result[step_cnt + 1].__from_name; + + result[step_cnt].__counter = 1; + result[step_cnt].__data = NULL; + +#ifndef STATIC_GCONV + if (current->code->module_name[0] == '/') + { + /* Load the module, return handle for it. */ + struct __gconv_loaded_object *shlib_handle = + __gconv_find_shlib (current->code->module_name); + + if (shlib_handle == NULL) + { + failed = 1; + break; + } + + result[step_cnt].__shlib_handle = shlib_handle; + result[step_cnt].__modname = shlib_handle->name; + result[step_cnt].__fct = shlib_handle->fct; + result[step_cnt].__init_fct = shlib_handle->init_fct; + result[step_cnt].__end_fct = shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + result[step_cnt].__btowc_fct = NULL; + + /* Call the init function. */ + __gconv_init_fct init_fct = result[step_cnt].__init_fct; + if (init_fct != NULL) + { + assert (result[step_cnt].__shlib_handle != NULL); + +# ifdef PTR_DEMANGLE + PTR_DEMANGLE (init_fct); +# endif + status = DL_CALL_FCT (init_fct, (&result[step_cnt])); + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + { + failed = 1; + /* Make sure we unload this modules. */ + --step_cnt; + result[step_cnt].__end_fct = NULL; + break; + } + +# ifdef PTR_MANGLE + if (result[step_cnt].__btowc_fct != NULL) + PTR_MANGLE (result[step_cnt].__btowc_fct); +# endif + } + } + else +#endif + /* It's a builtin transformation. */ + __gconv_get_builtin_trans (current->code->module_name, + &result[step_cnt]); + + current = current->last; + } + + if (__builtin_expect (failed, 0) != 0) + { + /* Something went wrong while initializing the modules. */ + while (++step_cnt < *nsteps) + __gconv_release_step (&result[step_cnt]); + free (result); + free (from_name); + free (to_name); + *nsteps = 0; + *handle = NULL; + if (status == __GCONV_OK) + status = __GCONV_NOCONV; + } + else + *handle = result; + } + else + { + *nsteps = 0; + *handle = NULL; + } + + return status; +} + + +#ifndef STATIC_GCONV +static int +internal_function +increment_counter (struct __gconv_step *steps, size_t nsteps) +{ + /* Increment the user counter. */ + size_t cnt = nsteps; + int result = __GCONV_OK; + + while (cnt-- > 0) + { + struct __gconv_step *step = &steps[cnt]; + + if (step->__counter++ == 0) + { + /* Skip builtin modules. */ + if (step->__modname != NULL) + { + /* Reopen a previously used module. */ + step->__shlib_handle = __gconv_find_shlib (step->__modname); + if (step->__shlib_handle == NULL) + { + /* Oops, this is the second time we use this module + (after unloading) and this time loading failed!? */ + --step->__counter; + while (++cnt < nsteps) + __gconv_release_step (&steps[cnt]); + result = __GCONV_NOCONV; + break; + } + + /* The function addresses defined by the module may + have changed. */ + step->__fct = step->__shlib_handle->fct; + step->__init_fct = step->__shlib_handle->init_fct; + step->__end_fct = step->__shlib_handle->end_fct; + + /* These settings can be overridden by the init function. */ + step->__btowc_fct = NULL; + } + + /* Call the init function. */ + __gconv_init_fct init_fct = step->__init_fct; + if (init_fct != NULL) + { +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (init_fct); +#endif + DL_CALL_FCT (init_fct, (step)); + +#ifdef PTR_MANGLE + if (step->__btowc_fct != NULL) + PTR_MANGLE (step->__btowc_fct); +#endif + } + } + } + return result; +} +#endif + + +/* The main function: find a possible derivation from the `fromset' (either + the given name or the alias) to the `toset' (again with alias). */ +static int +internal_function +find_derivation (const char *toset, const char *toset_expand, + const char *fromset, const char *fromset_expand, + struct __gconv_step **handle, size_t *nsteps) +{ + struct derivation_step *first, *current, **lastp, *solution = NULL; + int best_cost_hi = INT_MAX; + int best_cost_lo = INT_MAX; + int result; + + /* Look whether an earlier call to `find_derivation' has already + computed a possible derivation. If so, return it immediately. */ + result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, + handle, nsteps); + if (result == __GCONV_OK) + { +#ifndef STATIC_GCONV + result = increment_counter (*handle, *nsteps); +#endif + return result; + } + + /* The task is to find a sequence of transformations, backed by the + existing modules - whether builtin or dynamically loadable -, + starting at `fromset' (or `fromset_expand') and ending at `toset' + (or `toset_expand'), and with minimal cost. + + For computer scientists, this is a shortest path search in the + graph where the nodes are all possible charsets and the edges are + the transformations listed in __gconv_modules_db. + + For now we use a simple algorithm with quadratic runtime behaviour. + A breadth-first search, starting at `fromset' and `fromset_expand'. + The list starting at `first' contains all nodes that have been + visited up to now, in the order in which they have been visited -- + excluding the goal nodes `toset' and `toset_expand' which get + managed in the list starting at `solution'. + `current' walks through the list starting at `first' and looks + which nodes are reachable from the current node, adding them to + the end of the list [`first' or `solution' respectively] (if + they are visited the first time) or updating them in place (if + they have have already been visited). + In each node of either list, cost_lo and cost_hi contain the + minimum cost over any paths found up to now, starting at `fromset' + or `fromset_expand', ending at that node. best_cost_lo and + best_cost_hi represent the minimum over the elements of the + `solution' list. */ + + if (fromset_expand != NULL) + { + first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); + first->next = NEW_STEP (fromset, 0, 0, NULL, NULL); + lastp = &first->next->next; + } + else + { + first = NEW_STEP (fromset, 0, 0, NULL, NULL); + lastp = &first->next; + } + + for (current = first; current != NULL; current = current->next) + { + /* Now match all the available module specifications against the + current charset name. If any of them matches check whether + we already have a derivation for this charset. If yes, use the + one with the lower costs. Otherwise add the new charset at the + end. + + The module database is organized in a tree form which allows + searching for prefixes. So we search for the first entry with a + matching prefix and any other matching entry can be found from + this place. */ + struct gconv_module *node; + + /* Maybe it is not necessary anymore to look for a solution for + this entry since the cost is already as high (or higher) as + the cost for the best solution so far. */ + if (current->cost_hi > best_cost_hi + || (current->cost_hi == best_cost_hi + && current->cost_lo >= best_cost_lo)) + continue; + + node = __gconv_modules_db; + while (node != NULL) + { + int cmpres = strcmp (current->result_set, node->from_string); + if (cmpres == 0) + { + /* Walk through the list of modules with this prefix and + try to match the name. */ + struct gconv_module *runp; + + /* Check all the modules with this prefix. */ + runp = node; + do + { + const char *result_set = (strcmp (runp->to_string, "-") == 0 + ? (toset_expand ?: toset) + : runp->to_string); + int cost_hi = runp->cost_hi + current->cost_hi; + int cost_lo = runp->cost_lo + current->cost_lo; + struct derivation_step *step; + + /* We managed to find a derivation. First see whether + we have reached one of the goal nodes. */ + if (strcmp (result_set, toset) == 0 + || (toset_expand != NULL + && strcmp (result_set, toset_expand) == 0)) + { + /* Append to the `solution' list if there + is no entry with this name. */ + for (step = solution; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + step = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + step->next = solution; + solution = step; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `solution' list. */ + step->code = runp; + step->last = current; + step->cost_hi = cost_hi; + step->cost_lo = cost_lo; + } + + /* Update best_cost accordingly. */ + if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + best_cost_hi = cost_hi; + best_cost_lo = cost_lo; + } + } + else if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + /* Append at the end of the `first' list if there + is no entry with this name. */ + for (step = first; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + *lastp = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + lastp = &(*lastp)->next; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `first' list. */ + step->code = runp; + step->last = current; + + /* Update the cost for all steps. */ + for (step = first; step != NULL; + step = step->next) + /* But don't update the start nodes. */ + if (step->code != NULL) + { + struct derivation_step *back; + int hi, lo; + + hi = step->code->cost_hi; + lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + hi += back->code->cost_hi; + lo += back->code->cost_lo; + } + + step->cost_hi = hi; + step->cost_lo = lo; + } + + /* Likewise for the nodes on the solution list. + Also update best_cost accordingly. */ + for (step = solution; step != NULL; + step = step->next) + { + step->cost_hi = (step->code->cost_hi + + step->last->cost_hi); + step->cost_lo = (step->code->cost_lo + + step->last->cost_lo); + + if (step->cost_hi < best_cost_hi + || (step->cost_hi == best_cost_hi + && step->cost_lo < best_cost_lo)) + { + best_cost_hi = step->cost_hi; + best_cost_lo = step->cost_lo; + } + } + } + } + + runp = runp->same; + } + while (runp != NULL); + + break; + } + else if (cmpres < 0) + node = node->left; + else + node = node->right; + } + } + + if (solution != NULL) + { + /* We really found a way to do the transformation. */ + + /* Choose the best solution. This is easy because we know that + the solution list has at most length 2 (one for every possible + goal node). */ + if (solution->next != NULL) + { + struct derivation_step *solution2 = solution->next; + + if (solution2->cost_hi < solution->cost_hi + || (solution2->cost_hi == solution->cost_hi + && solution2->cost_lo < solution->cost_lo)) + solution = solution2; + } + + /* Now build a data structure describing the transformation steps. */ + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); + } + else + { + /* We haven't found a transformation. Clear the result values. */ + *handle = NULL; + *nsteps = 0; + } + + /* Add result in any case to list of known derivations. */ + add_derivation (fromset_expand ?: fromset, toset_expand ?: toset, + *handle, *nsteps); + + return result; +} + + +/* Control of initialization. */ +__libc_once_define (static, once); + + +static const char * +do_lookup_alias (const char *name) +{ + struct gconv_alias key; + struct gconv_alias **found; + + key.fromname = (char *) name; + found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare); + return found != NULL ? (*found)->toname : NULL; +} + + +int +internal_function +__gconv_compare_alias (const char *name1, const char *name2) +{ + int result; + + /* Ensure that the configuration data is read. */ + __libc_once (once, __gconv_read_conf); + + if (__gconv_compare_alias_cache (name1, name2, &result) != 0) + result = strcmp (do_lookup_alias (name1) ?: name1, + do_lookup_alias (name2) ?: name2); + + return result; +} + + +int +internal_function +__gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) +{ + const char *fromset_expand; + const char *toset_expand; + int result; + + /* Ensure that the configuration data is read. */ + __libc_once (once, __gconv_read_conf); + + /* Acquire the lock. */ + __libc_lock_lock (__gconv_lock); + + result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags); + if (result != __GCONV_NODB) + { + /* We have a cache and could resolve the request, successful or not. */ + __libc_lock_unlock (__gconv_lock); + return result; + } + + /* If we don't have a module database return with an error. */ + if (__gconv_modules_db == NULL) + { + __libc_lock_unlock (__gconv_lock); + return __GCONV_NOCONV; + } + + /* See whether the names are aliases. */ + fromset_expand = do_lookup_alias (fromset); + toset_expand = do_lookup_alias (toset); + + if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0) + /* We are not supposed to create a pseudo transformation (means + copying) when the input and output character set are the same. */ + && (strcmp (toset, fromset) == 0 + || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0) + || (fromset_expand != NULL + && (strcmp (toset, fromset_expand) == 0 + || (toset_expand != NULL + && strcmp (toset_expand, fromset_expand) == 0))))) + { + /* Both character sets are the same. */ + __libc_lock_unlock (__gconv_lock); + return __GCONV_NULCONV; + } + + result = find_derivation (toset, toset_expand, fromset, fromset_expand, + handle, nsteps); + + /* Release the lock. */ + __libc_lock_unlock (__gconv_lock); + + /* The following code is necessary since `find_derivation' will return + GCONV_OK even when no derivation was found but the same request + was processed before. I.e., negative results will also be cached. */ + return (result == __GCONV_OK + ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK) + : result); +} + + +/* Release the entries of the modules list. */ +int +internal_function +__gconv_close_transform (struct __gconv_step *steps, size_t nsteps) +{ + int result = __GCONV_OK; + size_t cnt; + + /* Acquire the lock. */ + __libc_lock_lock (__gconv_lock); + +#ifndef STATIC_GCONV + cnt = nsteps; + while (cnt-- > 0) + __gconv_release_step (&steps[cnt]); +#endif + + /* If we use the cache we free a bit more since we don't keep any + transformation records around, they are cheap enough to + recreate. */ + __gconv_release_cache (steps, nsteps); + + /* Release the lock. */ + __libc_lock_unlock (__gconv_lock); + + return result; +} + + +/* Free the modules mentioned. */ +static void +internal_function __libc_freeres_fn_section +free_modules_db (struct gconv_module *node) +{ + if (node->left != NULL) + free_modules_db (node->left); + if (node->right != NULL) + free_modules_db (node->right); + do + { + struct gconv_module *act = node; + node = node->same; + if (act->module_name[0] == '/') + free (act); + } + while (node != NULL); +} + + +/* Free all resources if necessary. */ +libc_freeres_fn (free_mem) +{ + /* First free locale memory. This needs to be done before freeing + derivations, as ctype cleanup functions dereference steps arrays which we + free below. */ + _nl_locale_subfreeres (); + + /* finddomain.c has similar problem. */ + extern void _nl_finddomain_subfreeres (void) attribute_hidden; + _nl_finddomain_subfreeres (); + + if (__gconv_alias_db != NULL) + __tdestroy (__gconv_alias_db, free); + + if (__gconv_modules_db != NULL) + free_modules_db (__gconv_modules_db); + + if (known_derivations != NULL) + __tdestroy (known_derivations, free_derivation); +} diff --git a/REORG.TODO/iconv/gconv_dl.c b/REORG.TODO/iconv/gconv_dl.c new file mode 100644 index 0000000000..241836204d --- /dev/null +++ b/REORG.TODO/iconv/gconv_dl.c @@ -0,0 +1,242 @@ +/* Handle loading/unloading of shared object for transformation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <dlfcn.h> +#include <inttypes.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <libc-lock.h> +#include <sys/param.h> + +#include <gconv_int.h> +#include <sysdep.h> + + +#ifdef DEBUG +/* For debugging purposes. */ +static void print_all (void); +#endif + + +/* This is a tuning parameter. If a transformation module is not used + anymore it gets not immediately unloaded. Instead we wait a certain + number of load attempts for further modules. If none of the + subsequent load attempts name the same object it finally gets unloaded. + Otherwise it is still available which hopefully is the frequent case. + The following number is the number of unloading attempts we wait + before unloading. */ +#define TRIES_BEFORE_UNLOAD 2 + +/* Array of loaded objects. This is shared by all threads so we have + to use semaphores to access it. */ +static void *loaded; + +/* Comparison function for searching `loaded_object' tree. */ +static int +known_compare (const void *p1, const void *p2) +{ + const struct __gconv_loaded_object *s1 = + (const struct __gconv_loaded_object *) p1; + const struct __gconv_loaded_object *s2 = + (const struct __gconv_loaded_object *) p2; + + return strcmp (s1->name, s2->name); +} + +/* Open the gconv database if necessary. A non-negative return value + means success. */ +struct __gconv_loaded_object * +internal_function +__gconv_find_shlib (const char *name) +{ + struct __gconv_loaded_object *found; + void *keyp; + + /* Search the tree of shared objects previously requested. Data in + the tree are `loaded_object' structures, whose first member is a + `const char *', the lookup key. The search returns a pointer to + the tree node structure; the first member of the is a pointer to + our structure (i.e. what will be a `loaded_object'); since the + first member of that is the lookup key string, &FCT_NAME is close + enough to a pointer to our structure to use as a lookup key that + will be passed to `known_compare' (above). */ + + keyp = __tfind (&name, &loaded, known_compare); + if (keyp == NULL) + { + /* This name was not known before. */ + size_t namelen = strlen (name) + 1; + + found = malloc (sizeof (struct __gconv_loaded_object) + namelen); + if (found != NULL) + { + /* Point the tree node at this new structure. */ + found->name = (char *) memcpy (found + 1, name, namelen); + found->counter = -TRIES_BEFORE_UNLOAD - 1; + found->handle = NULL; + + if (__builtin_expect (__tsearch (found, &loaded, known_compare) + == NULL, 0)) + { + /* Something went wrong while inserting the entry. */ + free (found); + found = NULL; + } + } + } + else + found = *(struct __gconv_loaded_object **) keyp; + + /* Try to load the shared object if the usage count is 0. This + implies that if the shared object is not loadable, the handle is + NULL and the usage count > 0. */ + if (found != NULL) + { + if (found->counter < -TRIES_BEFORE_UNLOAD) + { + assert (found->handle == NULL); + found->handle = __libc_dlopen (found->name); + if (found->handle != NULL) + { + found->fct = __libc_dlsym (found->handle, "gconv"); + if (found->fct == NULL) + { + /* Argh, no conversion function. There is something + wrong here. */ + __gconv_release_shlib (found); + found = NULL; + } + else + { + found->init_fct = __libc_dlsym (found->handle, "gconv_init"); + found->end_fct = __libc_dlsym (found->handle, "gconv_end"); + +#ifdef PTR_MANGLE + PTR_MANGLE (found->fct); + if (found->init_fct != NULL) + PTR_MANGLE (found->init_fct); + if (found->end_fct != NULL) + PTR_MANGLE (found->end_fct); +#endif + + /* We have succeeded in loading the shared object. */ + found->counter = 1; + } + } + else + /* Error while loading the shared object. */ + found = NULL; + } + else if (found->handle != NULL) + found->counter = MAX (found->counter + 1, 1); + } + + return found; +} + + +/* This is very ugly but the tsearch functions provide no way to pass + information to the walker function. So we use a global variable. + It is MT safe since we use a lock. */ +static struct __gconv_loaded_object *release_handle; + +static void +do_release_shlib (void *nodep, VISIT value, int level) +{ + struct __gconv_loaded_object *obj = *(struct __gconv_loaded_object **) nodep; + + if (value != preorder && value != leaf) + return; + + if (obj == release_handle) + { + /* This is the object we want to unload. Now decrement the + reference counter. */ + assert (obj->counter > 0); + --obj->counter; + } + else if (obj->counter <= 0 && obj->counter >= -TRIES_BEFORE_UNLOAD + && --obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) + { + /* Unload the shared object. */ + __libc_dlclose (obj->handle); + obj->handle = NULL; + } +} + + +/* Notify system that a shared object is not longer needed. */ +void +internal_function +__gconv_release_shlib (struct __gconv_loaded_object *handle) +{ + /* Urgh, this is ugly but we have no other possibility. */ + release_handle = handle; + + /* Process all entries. Please note that we also visit entries + with release counts <= 0. This way we can finally unload them + if necessary. */ + __twalk (loaded, (__action_fn_t) do_release_shlib); +} + + +/* We run this if we debug the memory allocation. */ +static void __libc_freeres_fn_section +do_release_all (void *nodep) +{ + struct __gconv_loaded_object *obj = (struct __gconv_loaded_object *) nodep; + + /* Unload the shared object. */ + if (obj->handle != NULL) + __libc_dlclose (obj->handle); + + free (obj); +} + +libc_freeres_fn (free_mem) +{ + __tdestroy (loaded, do_release_all); + loaded = NULL; +} + + +#ifdef DEBUG + +#include <stdio.h> + +static void +do_print (const void *nodep, VISIT value, int level) +{ + struct __gconv_loaded_object *obj = *(struct __gconv_loaded_object **) nodep; + + printf ("%10s: \"%s\", %d\n", + value == leaf ? "leaf" : + value == preorder ? "preorder" : + value == postorder ? "postorder" : "endorder", + obj->name, obj->counter); +} + +static void __attribute__ ((used)) +print_all (void) +{ + __twalk (loaded, do_print); +} +#endif diff --git a/REORG.TODO/iconv/gconv_int.h b/REORG.TODO/iconv/gconv_int.h new file mode 100644 index 0000000000..85a67ad31b --- /dev/null +++ b/REORG.TODO/iconv/gconv_int.h @@ -0,0 +1,287 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _GCONV_INT_H +#define _GCONV_INT_H 1 + +#include "gconv.h" +#include <stdlib.h> /* For alloca used in macro below. */ +#include <ctype.h> /* For __toupper_l used in macro below. */ +#include <string.h> /* For strlen et al used in macro below. */ +#include <libc-lock.h> + +__BEGIN_DECLS + + +/* Type to represent search path. */ +struct path_elem +{ + const char *name; + size_t len; +}; + +/* Variable with search path for `gconv' implementation. */ +extern struct path_elem *__gconv_path_elem attribute_hidden; +/* Maximum length of a single path element. */ +extern size_t __gconv_max_path_elem_len attribute_hidden; + + +/* Structure for alias definition. Simply two strings. */ +struct gconv_alias +{ + char *fromname; + char *toname; +}; + + +/* How many character should be converted in one call? */ +#define GCONV_NCHAR_GOAL 8160 + + +/* Structure describing one loaded shared object. This normally are + objects to perform conversation but as a special case the db shared + object is also handled. */ +struct __gconv_loaded_object +{ + /* Name of the object. It must be the first structure element. */ + const char *name; + + /* Reference counter for the db functionality. If no conversion is + needed we unload the db library. */ + int counter; + + /* The handle for the shared object. */ + void *handle; + + /* Pointer to the functions the module defines. */ + __gconv_fct fct; + __gconv_init_fct init_fct; + __gconv_end_fct end_fct; +}; + + +/* Description for an available conversion module. */ +struct gconv_module +{ + const char *from_string; + const char *to_string; + + int cost_hi; + int cost_lo; + + const char *module_name; + + struct gconv_module *left; /* Prefix smaller. */ + struct gconv_module *same; /* List of entries with identical prefix. */ + struct gconv_module *right; /* Prefix larger. */ +}; + + +/* Flags for `gconv_open'. */ +enum +{ + GCONV_AVOID_NOCONV = 1 << 0 +}; + +/* When GCONV_AVOID_NOCONV is set and no conversion is needed, + __GCONV_NULCONV should be returned. */ +enum +{ + __GCONV_NULCONV = -1 +}; + +/* Global variables. */ + +/* Database of alias names. */ +extern void *__gconv_alias_db attribute_hidden; + +/* Array with available modules. */ +extern size_t __gconv_nmodules; +extern struct gconv_module *__gconv_modules_db attribute_hidden; + +/* Value of the GCONV_PATH environment variable. */ +extern const char *__gconv_path_envvar attribute_hidden; + +/* Lock for the conversion database content. */ +__libc_lock_define (extern, __gconv_lock attribute_hidden) + + +/* The gconv functions expects the name to be in upper case and complete, + including the trailing slashes if necessary. */ +#define norm_add_slashes(str,suffix) \ + ({ \ + const char *cp = (str); \ + char *result; \ + char *tmp; \ + size_t cnt = 0; \ + const size_t suffix_len = strlen (suffix); \ + \ + while (*cp != '\0') \ + if (*cp++ == '/') \ + ++cnt; \ + \ + tmp = result = __alloca (cp - (str) + 3 + suffix_len); \ + cp = (str); \ + while (*cp != '\0') \ + *tmp++ = __toupper_l (*cp++, _nl_C_locobj_ptr); \ + if (cnt < 2) \ + { \ + *tmp++ = '/'; \ + if (cnt < 1) \ + { \ + *tmp++ = '/'; \ + if (suffix_len != 0) \ + tmp = __mempcpy (tmp, suffix, suffix_len); \ + } \ + } \ + *tmp = '\0'; \ + result; \ + }) + + +/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ +extern int __gconv_open (const char *toset, const char *fromset, + __gconv_t *handle, int flags) + internal_function; + +/* Free resources associated with transformation descriptor CD. */ +extern int __gconv_close (__gconv_t cd) + internal_function; + +/* Transform at most *INBYTESLEFT bytes from buffer starting at *INBUF + according to rules described by CD and place up to *OUTBYTESLEFT + bytes in buffer starting at *OUTBUF. Return number of non-identical + conversions in *IRREVERSIBLE if this pointer is not null. */ +extern int __gconv (__gconv_t cd, const unsigned char **inbuf, + const unsigned char *inbufend, unsigned char **outbuf, + unsigned char *outbufend, size_t *irreversible) + internal_function; + +/* Return in *HANDLE a pointer to an array with *NSTEPS elements describing + the single steps necessary for transformation from FROMSET to TOSET. */ +extern int __gconv_find_transform (const char *toset, const char *fromset, + struct __gconv_step **handle, + size_t *nsteps, int flags) + internal_function; + +/* Search for transformation in cache data. */ +extern int __gconv_lookup_cache (const char *toset, const char *fromset, + struct __gconv_step **handle, size_t *nsteps, + int flags) + internal_function; + +/* Compare the two name for whether they are after alias expansion the + same. This function uses the cache and fails if none is + loaded. */ +extern int __gconv_compare_alias_cache (const char *name1, const char *name2, + int *result) internal_function; + +/* Free data associated with a step's structure. */ +extern void __gconv_release_step (struct __gconv_step *step) + internal_function; + +/* Read all the configuration data and cache it. */ +extern void __gconv_read_conf (void) attribute_hidden; + +/* Try to read module cache file. */ +extern int __gconv_load_cache (void) internal_function; + +/* Retrieve pointer to internal cache. */ +extern void *__gconv_get_cache (void); + +/* Retrieve pointer to internal module database. */ +extern struct gconv_module *__gconv_get_modules_db (void); + +/* Retrieve pointer to internal alias database. */ +extern void *__gconv_get_alias_db (void); + +/* Determine the directories we are looking in. */ +extern void __gconv_get_path (void) internal_function; + +/* Comparison function to search alias. */ +extern int __gconv_alias_compare (const void *p1, const void *p2) + attribute_hidden; + +/* Clear reference to transformation step implementations which might + cause the code to be unloaded. */ +extern int __gconv_close_transform (struct __gconv_step *steps, + size_t nsteps) + internal_function; + +/* Free all resources allocated for the transformation record when + using the cache. */ +extern void __gconv_release_cache (struct __gconv_step *steps, size_t nsteps) + internal_function; + +/* Load shared object named by NAME. If already loaded increment reference + count. */ +extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name) + internal_function; + +/* Release shared object. If no further reference is available unload + the object. */ +extern void __gconv_release_shlib (struct __gconv_loaded_object *handle) + internal_function; + +/* Fill STEP with information about builtin module with NAME. */ +extern void __gconv_get_builtin_trans (const char *name, + struct __gconv_step *step) + internal_function; + +libc_hidden_proto (__gconv_transliterate) + +/* If NAME is an codeset alias expand it. */ +extern int __gconv_compare_alias (const char *name1, const char *name2) + internal_function; + + +/* Builtin transformations. */ +#ifdef _LIBC +# define __BUILTIN_TRANSFORM(Name) \ + extern int Name (struct __gconv_step *step, \ + struct __gconv_step_data *data, \ + const unsigned char **inbuf, \ + const unsigned char *inbufend, \ + unsigned char **outbufstart, size_t *irreversible, \ + int do_flush, int consume_incomplete) + +__BUILTIN_TRANSFORM (__gconv_transform_ascii_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ascii); +__BUILTIN_TRANSFORM (__gconv_transform_utf8_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_utf8); +__BUILTIN_TRANSFORM (__gconv_transform_ucs2_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2); +__BUILTIN_TRANSFORM (__gconv_transform_ucs2reverse_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs2reverse); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4); +__BUILTIN_TRANSFORM (__gconv_transform_ucs4_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le); +__BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal); +__BUILTIN_TRANSFORM (__gconv_transform_internal_utf16); +__BUILTIN_TRANSFORM (__gconv_transform_utf16_internal); +# undef __BUITLIN_TRANSFORM + +/* Specialized conversion function for a single byte to INTERNAL, recognizing + only ASCII characters. */ +extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c); + +#endif + +__END_DECLS + +#endif /* gconv_int.h */ diff --git a/REORG.TODO/iconv/gconv_open.c b/REORG.TODO/iconv/gconv_open.c new file mode 100644 index 0000000000..ff4fd121eb --- /dev/null +++ b/REORG.TODO/iconv/gconv_open.c @@ -0,0 +1,208 @@ +/* Find matching transformation algorithms and initialize steps. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <locale.h> +#include "../locale/localeinfo.h" +#include <stdlib.h> +#include <string.h> + +#include <gconv_int.h> + + +int +internal_function +__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, + int flags) +{ + struct __gconv_step *steps; + size_t nsteps; + __gconv_t result = NULL; + size_t cnt = 0; + int res; + int conv_flags = 0; + const char *errhand; + const char *ignore; + bool translit = false; + + /* Find out whether any error handling method is specified. */ + errhand = strchr (toset, '/'); + if (errhand != NULL) + errhand = strchr (errhand + 1, '/'); + if (__glibc_likely (errhand != NULL)) + { + if (*++errhand == '\0') + errhand = NULL; + else + { + /* Make copy without the error handling description. */ + char *newtoset = (char *) alloca (errhand - toset + 1); + char *tok; + char *ptr = NULL /* Work around a bogus warning */; + + newtoset[errhand - toset] = '\0'; + toset = memcpy (newtoset, toset, errhand - toset); + + /* Find the appropriate transliteration handlers. */ + tok = strdupa (errhand); + + tok = __strtok_r (tok, ",", &ptr); + while (tok != NULL) + { + if (__strcasecmp_l (tok, "TRANSLIT", _nl_C_locobj_ptr) == 0) + translit = true; + else if (__strcasecmp_l (tok, "IGNORE", _nl_C_locobj_ptr) == 0) + /* Set the flag to ignore all errors. */ + conv_flags |= __GCONV_IGNORE_ERRORS; + + tok = __strtok_r (NULL, ",", &ptr); + } + } + } + + /* For the source character set we ignore the error handler specification. + XXX Is this really always the best? */ + ignore = strchr (fromset, '/'); + if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL + && *++ignore != '\0') + { + char *newfromset = (char *) alloca (ignore - fromset + 1); + + newfromset[ignore - fromset] = '\0'; + fromset = memcpy (newfromset, fromset, ignore - fromset); + } + + /* If the string is empty define this to mean the charset of the + currently selected locale. */ + if (strcmp (toset, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; + toset = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } + if (strcmp (fromset, "//") == 0) + { + const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); + size_t len = strlen (codeset); + char *dest; + fromset = dest = (char *) alloca (len + 3); + memcpy (__mempcpy (dest, codeset, len), "//", 3); + } + + res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); + if (res == __GCONV_OK) + { + /* Allocate room for handle. */ + result = (__gconv_t) malloc (sizeof (struct __gconv_info) + + (nsteps + * sizeof (struct __gconv_step_data))); + if (result == NULL) + res = __GCONV_NOMEM; + else + { + /* Remember the list of steps. */ + result->__steps = steps; + result->__nsteps = nsteps; + + /* Clear the array for the step data. */ + memset (result->__data, '\0', + nsteps * sizeof (struct __gconv_step_data)); + + /* Call all initialization functions for the transformation + step implementations. */ + for (cnt = 0; cnt < nsteps; ++cnt) + { + size_t size; + + /* Would have to be done if we would not clear the whole + array above. */ +#if 0 + /* Reset the counter. */ + result->__data[cnt].__invocation_counter = 0; + + /* It's a regular use. */ + result->__data[cnt].__internal_use = 0; +#endif + + /* We use the `mbstate_t' member in DATA. */ + result->__data[cnt].__statep = &result->__data[cnt].__state; + + /* The builtin transliteration handling only + supports the internal encoding. */ + if (translit + && __strcasecmp_l (steps[cnt].__from_name, + "INTERNAL", _nl_C_locobj_ptr) == 0) + conv_flags |= __GCONV_TRANSLIT; + + /* If this is the last step we must not allocate an + output buffer. */ + if (cnt < nsteps - 1) + { + result->__data[cnt].__flags = conv_flags; + + /* Allocate the buffer. */ + size = (GCONV_NCHAR_GOAL * steps[cnt].__max_needed_to); + + result->__data[cnt].__outbuf = malloc (size); + if (result->__data[cnt].__outbuf == NULL) + { + res = __GCONV_NOMEM; + goto bail; + } + + result->__data[cnt].__outbufend = + result->__data[cnt].__outbuf + size; + } + else + { + /* Handle the last entry. */ + result->__data[cnt].__flags = conv_flags | __GCONV_IS_LAST; + + break; + } + } + } + + if (res != __GCONV_OK) + { + /* Something went wrong. Free all the resources. */ + int serrno; + bail: + serrno = errno; + + if (result != NULL) + { + while (cnt-- > 0) + free (result->__data[cnt].__outbuf); + + free (result); + result = NULL; + } + + __gconv_close_transform (steps, nsteps); + + __set_errno (serrno); + } + } + + *handle = result; + return res; +} diff --git a/REORG.TODO/iconv/gconv_simple.c b/REORG.TODO/iconv/gconv_simple.c new file mode 100644 index 0000000000..863d3dcc3f --- /dev/null +++ b/REORG.TODO/iconv/gconv_simple.c @@ -0,0 +1,1329 @@ +/* Simple transformations functions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <byteswap.h> +#include <dlfcn.h> +#include <endian.h> +#include <errno.h> +#include <gconv.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <sys/param.h> +#include <gconv_int.h> + +#define BUILTIN_ALIAS(s1, s2) /* nothing */ +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ + const unsigned char **, const unsigned char *, \ + unsigned char **, size_t *, int, int); +#include "gconv_builtin.h" + + +#ifndef EILSEQ +# define EILSEQ EINVAL +#endif + + +/* Specialized conversion function for a single byte to INTERNAL, recognizing + only ASCII characters. */ +wint_t +__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) +{ + if (c < 0x80) + return c; + else + return WEOF; +} + + +/* Transform from the internal, UCS4-like format, to UCS4. The + difference between the internal ucs4 format and the real UCS4 + format is, if any, the endianess. The Unicode/ISO 10646 says that + unless some higher protocol specifies it differently, the byte + order is big endian.*/ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4_loop +#define TO_LOOP internal_ucs4_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4 +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +internal_ucs4_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + uint32_t *outptr32 = (uint32_t *) outptr; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + *outptr32++ = bswap_32 (*(const uint32_t *) inptr); + + *inptrp = inptr; + *outptrp = (unsigned char *) outptr32; +#elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +#else +# error "This endianess is not supported." +#endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +internal_ucs4_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +# if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + outptr[0] = inptr[3]; + outptr[1] = inptr[2]; + outptr[2] = inptr[1]; + outptr[3] = inptr[0]; + } + + *inptrp = inptr; + *outptrp = outptr; +# elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +# else +# error "This endianess is not supported." +# endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +internal_ucs4_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; + +#elif __BYTE_ORDER == __BIG_ENDIAN + /* XXX unaligned */ + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#else +# error "This endianess is not supported." +#endif + *outptrp += 4; + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Transform from UCS4 to the internal, UCS4-like format. Unlike + for the other direction we have to check for correct values here. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4_internal_loop +#define TO_LOOP ucs4_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4_internal +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +ucs4_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + inval = bswap_32 (*(const uint32_t *) inptr); +#else + inval = *(const uint32_t *) inptr; +#endif + + if (__glibc_unlikely (inval > 0x7fffffff)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr) = inval; + outptr += sizeof (uint32_t); + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +ucs4_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (__glibc_unlikely (inptr[0] > 0x80)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __LITTLE_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + outptr += 4; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +ucs4_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, + 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + { + *inptrp -= cnt - (state->__count & 7); + return __GCONV_ILLEGAL_INPUT; + } + } + else + { +#if __BYTE_ORDER == __LITTLE_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + *outptrp += 4; + } + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Similarly for the little endian form. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs4le_loop +#define TO_LOOP internal_ucs4le_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs4le +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +internal_ucs4le_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +#if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + uint32_t *outptr32 = (uint32_t *) outptr; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + *outptr32++ = bswap_32 (*(const uint32_t *) inptr); + outptr = (unsigned char *) outptr32; + + *inptrp = inptr; + *outptrp = outptr; +#elif __BYTE_ORDER == __LITTLE_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +#else +# error "This endianess is not supported." +#endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +internal_ucs4le_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + +# if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { + outptr[0] = inptr[3]; + outptr[1] = inptr[2]; + outptr[2] = inptr[1]; + outptr[3] = inptr[0]; + } + + *inptrp = inptr; + *outptrp = outptr; +# elif __BYTE_ORDER == __LITTLE_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +# else +# error "This endianess is not supported." +# endif + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +internal_ucs4le_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; + +#else + /* XXX unaligned */ + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; + +#endif + + *outptrp += 4; + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* And finally from UCS4-LE to the internal encoding. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs4le_internal_loop +#define TO_LOOP ucs4le_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs4le_internal +#define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +ucs4le_internal_loop (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + uint32_t inval; + +#if __BYTE_ORDER == __BIG_ENDIAN + inval = bswap_32 (*(const uint32_t *) inptr); +#else + inval = *(const uint32_t *) inptr; +#endif + + if (__glibc_unlikely (inval > 0x7fffffff)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + + *((uint32_t *) outptr) = inval; + outptr += sizeof (uint32_t); + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} + +#if !_STRING_ARCH_unaligned +static inline int +__attribute ((always_inline)) +ucs4le_internal_loop_unaligned (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; + int result; + size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + { + if (__glibc_unlikely (inptr[3] > 0x80)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + +# if __BYTE_ORDER == __BIG_ENDIAN + outptr[3] = inptr[0]; + outptr[2] = inptr[1]; + outptr[1] = inptr[2]; + outptr[0] = inptr[3]; +# else + outptr[0] = inptr[0]; + outptr[1] = inptr[1]; + outptr[2] = inptr[2]; + outptr[3] = inptr[3]; +# endif + + outptr += 4; + } + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} +#endif + + +static inline int +__attribute ((always_inline)) +ucs4le_internal_loop_single (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible) +{ + mbstate_t *state = step_data->__statep; + int flags = step_data->__flags; + size_t cnt = state->__count & 7; + + while (*inptrp < inend && cnt < 4) + state->__value.__wchb[cnt++] = *(*inptrp)++; + + if (__glibc_unlikely (cnt < 4)) + { + /* Still not enough bytes. Store the ones in the input buffer. */ + state->__count &= ~7; + state->__count |= cnt; + + return __GCONV_INCOMPLETE_INPUT; + } + + if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, + 0)) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + return __GCONV_ILLEGAL_INPUT; + } + else + { +#if __BYTE_ORDER == __BIG_ENDIAN + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; +#else + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; +#endif + + *outptrp += 4; + } + + /* Clear the state buffer. */ + state->__count &= ~7; + + return __GCONV_OK; +} + +#include <iconv/skeleton.c> + + +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ascii_internal_loop +#define TO_LOOP ascii_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ascii_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__glibc_unlikely (*inptr > '\x7f')) \ + { \ + /* The value is too large. We don't try transliteration here since \ + this is not an error because of the lack of possibilities to \ + represent the result. This is a genuine bug in the input since \ + ASCII does not allow such values. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *((uint32_t *) outptr) = *inptr++; \ + outptr += sizeof (uint32_t); \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ascii_loop +#define TO_LOOP internal_ascii_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ascii +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ + { \ + UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *outptr++ = *((const uint32_t *) inptr); \ + inptr += sizeof (uint32_t); \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UTF-8. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 1 +#define MAX_NEEDED_TO 6 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_utf8_loop +#define TO_LOOP internal_utf8_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_utf8 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t wc = *((const uint32_t *) inptr); \ + \ + if (__glibc_likely (wc < 0x80)) \ + /* It's an one byte sequence. */ \ + *outptr++ = (unsigned char) wc; \ + else if (__glibc_likely (wc <= 0x7fffffff \ + && (wc < 0xd800 || wc > 0xdfff))) \ + { \ + size_t step; \ + unsigned char *start; \ + \ + for (step = 2; step < 6; ++step) \ + if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ + break; \ + \ + if (__glibc_unlikely (outptr + step > outend)) \ + { \ + /* Too long. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + start = outptr; \ + *outptr = (unsigned char) (~0xff >> step); \ + outptr += step; \ + do \ + { \ + start[--step] = 0x80 | (wc & 0x3f); \ + wc >>= 6; \ + } \ + while (step > 1); \ + start[0] |= wc; \ + } \ + else \ + { \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + \ + inptr += 4; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UTF-8 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 6 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP utf8_internal_loop +#define TO_LOOP utf8_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_utf8_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + /* Next input byte. */ \ + uint32_t ch = *inptr; \ + \ + if (__glibc_likely (ch < 0x80)) \ + { \ + /* One byte sequence. */ \ + ++inptr; \ + } \ + else \ + { \ + uint_fast32_t cnt; \ + uint_fast32_t i; \ + \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ + otherwise the wide character could have been represented \ + using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + else \ + { \ + /* Search the end of this ill-formed UTF-8 character. This \ + is the next byte with (x & 0xc0) != 0x80. */ \ + i = 0; \ + do \ + ++i; \ + while (inptr + i < inend \ + && (*(inptr + i) & 0xc0) == 0x80 \ + && i < 5); \ + \ + errout: \ + STANDARD_FROM_LOOP_ERR_HANDLER (i); \ + } \ + \ + if (__glibc_unlikely (inptr + cnt > inend)) \ + { \ + /* We don't have enough input. But before we report that check \ + that all the bytes are correct. */ \ + for (i = 1; inptr + i < inend; ++i) \ + if ((inptr[i] & 0xc0) != 0x80) \ + break; \ + \ + if (__glibc_likely (inptr + i == inend)) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + goto errout; \ + } \ + \ + /* Read the possible remaining bytes. */ \ + for (i = 1; i < cnt; ++i) \ + { \ + uint32_t byte = inptr[i]; \ + \ + if ((byte & 0xc0) != 0x80) \ + /* This is an illegal encoding. */ \ + break; \ + \ + ch <<= 6; \ + ch |= byte & 0x3f; \ + } \ + \ + /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ + If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ + have been represented with fewer than cnt bytes. */ \ + if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ + /* Do not accept UTF-16 surrogates. */ \ + || (ch >= 0xd800 && ch <= 0xdfff)) \ + { \ + /* This is an illegal encoding. */ \ + goto errout; \ + } \ + \ + inptr += cnt; \ + } \ + \ + /* Now adjust the pointers and store the result. */ \ + *((uint32_t *) outptr) = ch; \ + outptr += sizeof (uint32_t); \ + } +#define LOOP_NEED_FLAGS + +#define STORE_REST \ + { \ + /* We store the remaining bytes while converting them into the UCS4 \ + format. We can assume that the first byte in the buffer is \ + correct and that it requires a larger number of bytes than there \ + are in the input buffer. */ \ + wint_t ch = **inptrp; \ + size_t cnt, r; \ + \ + state->__count = inend - *inptrp; \ + \ + assert (ch != 0xc0 && ch != 0xc1); \ + if (ch >= 0xc2 && ch < 0xe0) \ + { \ + /* We expect two bytes. The first byte cannot be 0xc0 or \ + 0xc1, otherwise the wide character could have been \ + represented using a single byte. */ \ + cnt = 2; \ + ch &= 0x1f; \ + } \ + else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ + { \ + /* We expect three bytes. */ \ + cnt = 3; \ + ch &= 0x0f; \ + } \ + else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ + { \ + /* We expect four bytes. */ \ + cnt = 4; \ + ch &= 0x07; \ + } \ + else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ + { \ + /* We expect five bytes. */ \ + cnt = 5; \ + ch &= 0x03; \ + } \ + else \ + { \ + /* We expect six bytes. */ \ + cnt = 6; \ + ch &= 0x01; \ + } \ + \ + /* The first byte is already consumed. */ \ + r = cnt - 1; \ + while (++(*inptrp) < inend) \ + { \ + ch <<= 6; \ + ch |= **inptrp & 0x3f; \ + --r; \ + } \ + \ + /* Shift for the so far missing bytes. */ \ + ch <<= r * 6; \ + \ + /* Store the number of bytes expected for the entire sequence. */ \ + state->__count |= cnt << 8; \ + \ + /* Store the value. */ \ + state->__value.__wch = ch; \ + } + +#define UNPACK_BYTES \ + { \ + static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ + wint_t wch = state->__value.__wch; \ + size_t ntotal = state->__count >> 8; \ + \ + inlen = state->__count & 255; \ + \ + bytebuf[0] = inmask[ntotal - 2]; \ + \ + do \ + { \ + if (--ntotal < inlen) \ + bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ + wch >>= 6; \ + } \ + while (ntotal > 1); \ + \ + bytebuf[0] |= wch; \ + } + +#define CLEAR_STATE \ + state->__count = 0 + + +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2_internal_loop +#define TO_LOOP ucs2_internal_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ucs2_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint16_t u1 = get16 (inptr); \ + \ + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (2); \ + } \ + \ + *((uint32_t *) outptr) = u1; \ + outptr += sizeof (uint32_t); \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2_loop +#define TO_LOOP internal_ucs2_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_internal_ucs2 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + \ + if (__glibc_unlikely (val >= 0x10000)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + put16 (outptr, val); \ + outptr += sizeof (uint16_t); \ + inptr += 4; \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION 1 +#define FROM_LOOP ucs2reverse_internal_loop +#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ +#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint16_t u1 = bswap_16 (get16 (inptr)); \ + \ + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; \ + } \ + \ + *((uint32_t *) outptr) = u1; \ + outptr += sizeof (uint32_t); \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> + + +/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP internal_ucs2reverse_loop +#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ +#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + if (__glibc_unlikely (val >= 0x10000)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + put16 (outptr, bswap_16 (val)); \ + outptr += sizeof (uint16_t); \ + inptr += 4; \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> diff --git a/REORG.TODO/iconv/gconv_trans.c b/REORG.TODO/iconv/gconv_trans.c new file mode 100644 index 0000000000..53b8822615 --- /dev/null +++ b/REORG.TODO/iconv/gconv_trans.c @@ -0,0 +1,239 @@ +/* Transliteration using the locale's data. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <dlfcn.h> +#include <search.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include <libc-lock.h> +#include "gconv_int.h" +#include "../locale/localeinfo.h" + + +int +__gconv_transliterate (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char *inbufstart, + const unsigned char **inbufp, + const unsigned char *inbufend, + unsigned char **outbufstart, size_t *irreversible) +{ + /* Find out about the locale's transliteration. */ + uint_fast32_t size; + const uint32_t *from_idx; + const uint32_t *from_tbl; + const uint32_t *to_idx; + const uint32_t *to_tbl; + const uint32_t *winbuf; + const uint32_t *winbufend; + uint_fast32_t low; + uint_fast32_t high; + + /* The input buffer. There are actually 4-byte values. */ + winbuf = (const uint32_t *) *inbufp; + winbufend = (const uint32_t *) inbufend; + + __gconv_fct fct = step->__fct; +#ifdef PTR_DEMANGLE + if (step->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + + /* If there is no transliteration information in the locale don't do + anything and return the error. */ + size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE); + if (size == 0) + goto no_rules; + + /* Get the rest of the values. */ + from_idx = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX); + from_tbl = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL); + to_idx = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX); + to_tbl = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL); + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + /* The array starting at FROM_IDX contains indeces to the string table + in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we + are doing binary search. */ + low = 0; + high = size; + while (low < high) + { + uint_fast32_t med = (low + high) / 2; + uint32_t idx; + int cnt; + + /* Compare the string at this index with the string at the current + position in the input buffer. */ + idx = from_idx[med]; + cnt = 0; + do + { + if (from_tbl[idx + cnt] != winbuf[cnt]) + /* Does not match. */ + break; + ++cnt; + } + while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend); + + if (cnt > 0 && from_tbl[idx + cnt] == L'\0') + { + /* Found a matching input sequence. Now try to convert the + possible replacements. */ + uint32_t idx2 = to_idx[med]; + + do + { + /* Determine length of replacement. */ + uint_fast32_t len = 0; + int res; + const unsigned char *toinptr; + unsigned char *outptr; + + while (to_tbl[idx2 + len] != L'\0') + ++len; + + /* Try this input text. */ + toinptr = (const unsigned char *) &to_tbl[idx2]; + outptr = *outbufstart; + res = DL_CALL_FCT (fct, + (step, step_data, &toinptr, + (const unsigned char *) &to_tbl[idx2 + len], + &outptr, NULL, 0, 0)); + if (res != __GCONV_ILLEGAL_INPUT) + { + /* If the conversion succeeds we have to increment the + input buffer. */ + if (res == __GCONV_EMPTY_INPUT) + { + *inbufp += cnt * sizeof (uint32_t); + ++*irreversible; + res = __GCONV_OK; + } + /* Do not increment the output pointer if we could not + store the entire output. */ + if (res != __GCONV_FULL_OUTPUT) + *outbufstart = outptr; + + return res; + } + + /* Next replacement. */ + idx2 += len + 1; + } + while (to_tbl[idx2] != L'\0'); + + /* Nothing found, continue searching. */ + } + else if (cnt > 0) + /* This means that the input buffer contents matches a prefix of + an entry. Since we cannot match it unless we get more input, + we will tell the caller about it. */ + return __GCONV_INCOMPLETE_INPUT; + + if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) + low = med + 1; + else + high = med; + } + + no_rules: + /* Maybe the character is supposed to be ignored. */ + if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0) + { + int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN); + const uint32_t *ranges = + (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE); + const uint32_t wc = *(const uint32_t *) (*inbufp); + int i; + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + for (i = 0; i < n; ranges += 3, ++i) + if (ranges[0] <= wc && wc <= ranges[1] + && (wc - ranges[0]) % ranges[2] == 0) + { + /* Matches the range. Ignore it. */ + *inbufp += 4; + ++*irreversible; + return __GCONV_OK; + } + else if (wc < ranges[0]) + /* There cannot be any other matching range since they are + sorted. */ + break; + } + + /* One last chance: use the default replacement. */ + if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0) + { + const uint32_t *default_missing = (const uint32_t *) + _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING); + const unsigned char *toinptr = (const unsigned char *) default_missing; + uint32_t len = _NL_CURRENT_WORD (LC_CTYPE, + _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN); + unsigned char *outptr; + int res; + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + outptr = *outbufstart; + res = DL_CALL_FCT (fct, + (step, step_data, &toinptr, + (const unsigned char *) (default_missing + len), + &outptr, NULL, 0, 0)); + + if (res != __GCONV_ILLEGAL_INPUT) + { + /* If the conversion succeeds we have to increment the + input buffer. */ + if (res == __GCONV_EMPTY_INPUT) + { + /* This worked but is not reversible. */ + ++*irreversible; + *inbufp += 4; + res = __GCONV_OK; + } + *outbufstart = outptr; + + return res; + } + } + + /* Haven't found a match. */ + return __GCONV_ILLEGAL_INPUT; +} +libc_hidden_def (__gconv_transliterate) diff --git a/REORG.TODO/iconv/iconv.c b/REORG.TODO/iconv/iconv.c new file mode 100644 index 0000000000..2c6f0f0bd1 --- /dev/null +++ b/REORG.TODO/iconv/iconv.c @@ -0,0 +1,95 @@ +/* Convert characters in input buffer using conversion descriptor to + output buffer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> /* for NULL */ +#include <errno.h> +#include <iconv.h> + +#include <gconv_int.h> + +#include <assert.h> + + +size_t +iconv (iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, + size_t *outbytesleft) +{ + __gconv_t gcd = (__gconv_t) cd; + char *outstart = outbuf ? *outbuf : NULL; + size_t irreversible; + int result; + + if (__glibc_unlikely (inbuf == NULL || *inbuf == NULL)) + { + if (outbuf == NULL || *outbuf == NULL) + result = __gconv (gcd, NULL, NULL, NULL, NULL, &irreversible); + else + result = __gconv (gcd, NULL, NULL, (unsigned char **) outbuf, + (unsigned char *) (outstart + *outbytesleft), + &irreversible); + } + else + { + const char *instart = *inbuf; + + result = __gconv (gcd, (const unsigned char **) inbuf, + (const unsigned char *) (*inbuf + *inbytesleft), + (unsigned char **) outbuf, + (unsigned char *) (*outbuf + *outbytesleft), + &irreversible); + + *inbytesleft -= *inbuf - instart; + } + if (outstart != NULL) + *outbytesleft -= *outbuf - outstart; + + switch (__builtin_expect (result, __GCONV_OK)) + { + case __GCONV_ILLEGAL_DESCRIPTOR: + __set_errno (EBADF); + irreversible = (size_t) -1L; + break; + + case __GCONV_ILLEGAL_INPUT: + __set_errno (EILSEQ); + irreversible = (size_t) -1L; + break; + + case __GCONV_FULL_OUTPUT: + __set_errno (E2BIG); + irreversible = (size_t) -1L; + break; + + case __GCONV_INCOMPLETE_INPUT: + __set_errno (EINVAL); + irreversible = (size_t) -1L; + break; + + case __GCONV_EMPTY_INPUT: + case __GCONV_OK: + /* Nothing. */ + break; + + default: + assert (!"Nothing like this should happen"); + } + + return irreversible; +} diff --git a/REORG.TODO/iconv/iconv.h b/REORG.TODO/iconv/iconv.h new file mode 100644 index 0000000000..d5d9d00f6b --- /dev/null +++ b/REORG.TODO/iconv/iconv.h @@ -0,0 +1,55 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ICONV_H +#define _ICONV_H 1 + +#include <features.h> +#define __need_size_t +#include <stddef.h> + + +__BEGIN_DECLS + +/* Identifier for conversion method from one codeset to another. */ +typedef void *iconv_t; + + +/* Allocate descriptor for code conversion from codeset FROMCODE to + codeset TOCODE. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern iconv_t iconv_open (const char *__tocode, const char *__fromcode); + +/* Convert at most *INBYTESLEFT bytes from *INBUF according to the + code conversion algorithm specified by CD and place up to + *OUTBYTESLEFT bytes in buffer at *OUTBUF. */ +extern size_t iconv (iconv_t __cd, char **__restrict __inbuf, + size_t *__restrict __inbytesleft, + char **__restrict __outbuf, + size_t *__restrict __outbytesleft); + +/* Free resources allocated for descriptor CD for code conversion. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int iconv_close (iconv_t __cd); + +__END_DECLS + +#endif /* iconv.h */ diff --git a/REORG.TODO/iconv/iconv_charmap.c b/REORG.TODO/iconv/iconv_charmap.c new file mode 100644 index 0000000000..b8ece3bda2 --- /dev/null +++ b/REORG.TODO/iconv/iconv_charmap.c @@ -0,0 +1,560 @@ +/* Convert using charmaps and possibly iconv(). + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "iconv_prog.h" + + +/* Prototypes for a few program-wide used functions. */ +#include <programs/xmalloc.h> + + +struct convtable +{ + int term[256 / 8]; + union + { + struct convtable *sub; + struct charseq *out; + } val[256]; +}; + + +static inline struct convtable * +allocate_table (void) +{ + return (struct convtable *) xcalloc (1, sizeof (struct convtable)); +} + + +static inline int +is_term (struct convtable *tbl, unsigned int idx) +{ + return tbl->term[idx / 8] & (1 << (idx % 8)); +} + + +static inline void +clear_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] &= ~(1 << (idx % 8)); +} + + +static inline void +set_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] |= 1 << (idx % 8); +} + + +/* Generate the conversion table. */ +static struct convtable *use_from_charmap (struct charmap_t *from_charmap, + const char *to_code); +static struct convtable *use_to_charmap (const char *from_code, + struct charmap_t *to_charmap); +static struct convtable *use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap); + +/* Prototypes for the functions doing the actual work. */ +static int process_block (struct convtable *tbl, char *addr, size_t len, + FILE *output); +static int process_fd (struct convtable *tbl, int fd, FILE *output); +static int process_file (struct convtable *tbl, FILE *input, FILE *output); + + +int +charmap_conversion (const char *from_code, struct charmap_t *from_charmap, + const char *to_code, struct charmap_t *to_charmap, + int argc, int remaining, char *argv[], + const char *output_file) +{ + struct convtable *cvtbl; + int status = EXIT_SUCCESS; + + /* We have three different cases to handle: + + - both, from_charmap and to_charmap, are available. This means we + can assume that the symbolic names match and use them to create + the mapping. + + - only from_charmap is available. In this case we can only hope that + the symbolic names used are of the <Uxxxx> form in which case we + can use a UCS4->"to_code" iconv() conversion for the second step. + + - only to_charmap is available. This is similar, only that we would + use iconv() for the "to_code"->UCS4 conversion. + + We first create a table which maps input bytes into output bytes. + Once this is done we can handle all three of the cases above + equally. */ + if (from_charmap != NULL) + { + if (to_charmap == NULL) + cvtbl = use_from_charmap (from_charmap, to_code); + else + cvtbl = use_both_charmaps (from_charmap, to_charmap); + } + else + { + assert (to_charmap != NULL); + cvtbl = use_to_charmap (from_code, to_charmap); + } + + /* If we couldn't generate a table stop now. */ + if (cvtbl == NULL) + return EXIT_FAILURE; + + /* Determine output file. */ + FILE *output; + if (output_file != NULL && strcmp (output_file, "-") != 0) + { + output = fopen (output_file, "w"); + if (output == NULL) + error (EXIT_FAILURE, errno, _("cannot open output file")); + } + else + output = stdout; + + /* We can now start the conversion. */ + if (remaining == argc) + { + if (process_file (cvtbl, stdin, output) != 0) + status = EXIT_FAILURE; + } + else + do + { + int fd; + + if (verbose) + printf ("%s:\n", argv[remaining]); + if (strcmp (argv[remaining], "-") == 0) + fd = 0; + else + { + fd = open (argv[remaining], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), + argv[remaining]); + status = EXIT_FAILURE; + continue; + } + } + +#ifdef _POSIX_MAPPED_FILES + struct stat64 st; + char *addr; + /* We have possibilities for reading the input file. First try + to mmap() it since this will provide the fastest solution. */ + if (fstat64 (fd, &st) == 0 + && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, + fd, 0)) != MAP_FAILED)) + { + /* Yes, we can use mmap(). The descriptor is not needed + anymore. */ + if (close (fd) != 0) + error (EXIT_FAILURE, errno, + _("error while closing input `%s'"), argv[remaining]); + + if (process_block (cvtbl, addr, st.st_size, output) < 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + } + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + if (process_fd (cvtbl, fd, output) != 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input file anymore. */ + close (fd); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* Now close the file. */ + close (fd); + } + } + while (++remaining < argc); + + /* All done. */ + return status; +} + + +/* Add the IN->OUT mapping to TBL. OUT is potentially stored in the table. + IN is used only here, so it need not be kept live afterwards. */ +static void +add_bytes (struct convtable *tbl, const struct charseq *in, struct charseq *out) +{ + int n = 0; + unsigned int byte; + + assert (in->nbytes > 0); + + byte = ((unsigned char *) in->bytes)[n]; + while (n + 1 < in->nbytes) + { + if (is_term (tbl, byte) || tbl->val[byte].sub == NULL) + { + /* Note that we simply ignore a definition for a byte sequence + which is also the prefix for a longer one. */ + clear_term (tbl, byte); + tbl->val[byte].sub = + (struct convtable *) xcalloc (1, sizeof (struct convtable)); + } + + tbl = tbl->val[byte].sub; + + byte = ((unsigned char *) in->bytes)[++n]; + } + + /* Only add the new sequence if there is none yet and the byte sequence + is not part of an even longer one. */ + if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL) + { + set_term (tbl, byte); + tbl->val[byte].out = out; + } +} + +/* Try to convert SEQ from WCHAR_T format using CD. + Returns a malloc'd struct or NULL. */ +static struct charseq * +convert_charseq (iconv_t cd, const struct charseq *seq) +{ + struct charseq *result = NULL; + + if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + { + /* There is a chance. Try the iconv module. */ + wchar_t inbuf[1] = { seq->ucs4 }; + unsigned char outbuf[64]; + char *inptr = (char *) inbuf; + size_t inlen = sizeof (inbuf); + char *outptr = (char *) outbuf; + size_t outlen = sizeof (outbuf); + + (void) iconv (cd, &inptr, &inlen, &outptr, &outlen); + + if (outptr != (char *) outbuf) + { + /* We got some output. Good, use it. */ + outlen = sizeof (outbuf) - outlen; + assert ((char *) outbuf + outlen == outptr); + + result = xmalloc (sizeof (struct charseq) + outlen); + result->name = seq->name; + result->ucs4 = seq->ucs4; + result->nbytes = outlen; + memcpy (result->bytes, outbuf, outlen); + } + + /* Clear any possible state left behind. */ + (void) iconv (cd, NULL, NULL, NULL, NULL); + } + + return result; +} + + +static struct convtable * +use_from_charmap (struct charmap_t *from_charmap, const char *to_code) +{ + /* We iterate over all entries in the from_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the to_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + cd = iconv_open (to_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = data; + struct charseq *newp = convert_charseq (cd, in); + if (newp != NULL) + add_bytes (rettbl, in, newp); + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_to_charmap (const char *from_code, struct charmap_t *to_charmap) +{ + /* We iterate over all entries in the to_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the from_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + /* Note that the conversion we use here is the reverse direction. Without + exhaustive search we cannot figure out which input yields the UCS4 + character we are looking for. Therefore we determine it the other + way round. */ + cd = iconv_open (from_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *out = data; + struct charseq *newp = convert_charseq (cd, out); + if (newp != NULL) + { + add_bytes (rettbl, newp, out); + free (newp); + } + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap) +{ + /* In this case we iterate over all the entries in the from_charmap, + determine the internal name, and find an appropriate entry in the + to_charmap (if it exists). */ + struct convtable *rettbl = allocate_table (); + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = (struct charseq *) data; + struct charseq *out = charmap_find_value (to_charmap, key, keylen); + + if (out != NULL) + add_bytes (rettbl, in, out); + } + + return rettbl; +} + + +static int +process_block (struct convtable *tbl, char *addr, size_t len, FILE *output) +{ + size_t n = 0; + + while (n < len) + { + struct convtable *cur = tbl; + unsigned char *curp = (unsigned char *) addr; + unsigned int byte = *curp; + int cnt; + struct charseq *out; + + while (! is_term (cur, byte)) + if (cur->val[byte].sub == NULL) + { + /* This is an invalid sequence. Skip the first byte if we are + ignoring errors. Otherwise punt. */ + if (! omit_invalid) + { + error (0, 0, _("illegal input sequence at position %Zd"), n); + return -1; + } + + n -= curp - (unsigned char *) addr; + + byte = *(curp = (unsigned char *) ++addr); + if (++n >= len) + /* All converted. */ + return 0; + + cur = tbl; + } + else + { + cur = cur->val[byte].sub; + + if (++n >= len) + { + error (0, 0, _("\ +incomplete character or shift sequence at end of buffer")); + return -1; + } + + byte = *++curp; + } + + /* We found a final byte. Write the output bytes. */ + out = cur->val[byte].out; + for (cnt = 0; cnt < out->nbytes; ++cnt) + fputc_unlocked (out->bytes[cnt], output); + + addr = (char *) curp + 1; + ++n; + } + + return 0; +} + + +static int +process_fd (struct convtable *tbl, int fd, FILE *output) +{ + /* We have a problem with reading from a descriptor since we must not + provide the iconv() function an incomplete character or shift + sequence at the end of the buffer. Since we have to deal with + arbitrary encodings we must read the whole text in a buffer and + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; + char *inptr = inbuf; + size_t actlen = 0; + + while (actlen < maxlen) + { + ssize_t n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + + if (actlen == maxlen) + while (1) + { + ssize_t n; + char *new_inbuf; + + /* Increase the buffer. */ + new_inbuf = (char *) realloc (inbuf, maxlen + 32768); + if (new_inbuf == NULL) + { + error (0, errno, _("unable to allocate buffer for input")); + return -1; + } + inbuf = new_inbuf; + maxlen += 32768; + inptr = inbuf + actlen; + + do + { + n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + while (actlen < maxlen); + + if (n == 0) + /* Break again so we leave both loops. */ + break; + } + + /* Now we have all the input in the buffer. Process it in one run. */ + return process_block (tbl, inbuf, actlen, output); +} + + +static int +process_file (struct convtable *tbl, FILE *input, FILE *output) +{ + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ + return process_fd (tbl, fileno (input), output); +} diff --git a/REORG.TODO/iconv/iconv_close.c b/REORG.TODO/iconv/iconv_close.c new file mode 100644 index 0000000000..b4b3aff082 --- /dev/null +++ b/REORG.TODO/iconv/iconv_close.c @@ -0,0 +1,36 @@ +/* Release any resource associated with given conversion descriptor. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <iconv.h> + +#include <gconv_int.h> + + +int +iconv_close (iconv_t cd) +{ + if (__glibc_unlikely (cd == (iconv_t *) -1L)) + { + __set_errno (EBADF); + return -1; + } + + return __gconv_close ((__gconv_t) cd) ? -1 : 0; +} diff --git a/REORG.TODO/iconv/iconv_open.c b/REORG.TODO/iconv/iconv_open.c new file mode 100644 index 0000000000..02e2b7d85e --- /dev/null +++ b/REORG.TODO/iconv/iconv_open.c @@ -0,0 +1,88 @@ +/* Get descriptor for character set conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <alloca.h> +#include <errno.h> +#include <iconv.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include <gconv_int.h> +#include "gconv_charset.h" + + +iconv_t +iconv_open (const char *tocode, const char *fromcode) +{ + /* Normalize the name. We remove all characters beside alpha-numeric, + '_', '-', '/', '.', and ':'. */ + size_t tocode_len = strlen (tocode) + 3; + char *tocode_conv; + bool tocode_usealloca = __libc_use_alloca (tocode_len); + if (tocode_usealloca) + tocode_conv = (char *) alloca (tocode_len); + else + { + tocode_conv = (char *) malloc (tocode_len); + if (tocode_conv == NULL) + return (iconv_t) -1; + } + strip (tocode_conv, tocode); + tocode = (tocode_conv[2] == '\0' && tocode[0] != '\0' + ? upstr (tocode_conv, tocode) : tocode_conv); + + size_t fromcode_len = strlen (fromcode) + 3; + char *fromcode_conv; + bool fromcode_usealloca = __libc_use_alloca (fromcode_len); + if (fromcode_usealloca) + fromcode_conv = (char *) alloca (fromcode_len); + else + { + fromcode_conv = (char *) malloc (fromcode_len); + if (fromcode_conv == NULL) + { + if (! tocode_usealloca) + free (tocode_conv); + return (iconv_t) -1; + } + } + strip (fromcode_conv, fromcode); + fromcode = (fromcode_conv[2] == '\0' && fromcode[0] != '\0' + ? upstr (fromcode_conv, fromcode) : fromcode_conv); + + __gconv_t cd; + int res = __gconv_open (tocode, fromcode, &cd, 0); + + if (! fromcode_usealloca) + free (fromcode_conv); + if (! tocode_usealloca) + free (tocode_conv); + + if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) + { + /* We must set the error number according to the specs. */ + if (res == __GCONV_NOCONV || res == __GCONV_NODB) + __set_errno (EINVAL); + + cd = (iconv_t) -1; + } + + return (iconv_t) cd; +} diff --git a/REORG.TODO/iconv/iconv_prog.c b/REORG.TODO/iconv/iconv_prog.c new file mode 100644 index 0000000000..1397d2e9bd --- /dev/null +++ b/REORG.TODO/iconv/iconv_prog.c @@ -0,0 +1,803 @@ +/* Convert text in given files from the specified from-set to the to-set. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <argp.h> +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <langinfo.h> +#include <locale.h> +#include <search.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libintl.h> +#ifdef _POSIX_MAPPED_FILES +# include <sys/mman.h> +#endif +#include <charmap.h> +#include <gconv_int.h> +#include "iconv_prog.h" +#include "iconvconfig.h" + +/* Get libc version number. */ +#include "../version.h" + +#define PACKAGE _libc_intl_domainname + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +#define OPT_VERBOSE 1000 +#define OPT_LIST 'l' + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { NULL, 0, NULL, 0, N_("Input/Output format specification:") }, + { "from-code", 'f', N_("NAME"), 0, N_("encoding of original text") }, + { "to-code", 't', N_("NAME"), 0, N_("encoding for output") }, + { NULL, 0, NULL, 0, N_("Information:") }, + { "list", 'l', NULL, 0, N_("list all known coded character sets") }, + { NULL, 0, NULL, 0, N_("Output control:") }, + { NULL, 'c', NULL, 0, N_("omit invalid characters from output") }, + { "output", 'o', N_("FILE"), 0, N_("output file") }, + { "silent", 's', NULL, 0, N_("suppress warnings") }, + { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("\ +Convert encoding of given files from one encoding to another."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("[FILE...]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + +/* Code sets to convert from and to respectively. An empty string as the + default causes the 'iconv_open' function to look up the charset of the + currently selected locale and use it. */ +static const char *from_code = ""; +static const char *to_code = ""; + +/* File to write output to. If NULL write to stdout. */ +static const char *output_file; + +/* Nonzero if verbose ouput is wanted. */ +int verbose; + +/* Nonzero if list of all coded character sets is wanted. */ +static int list; + +/* If nonzero omit invalid character from output. */ +int omit_invalid; + +/* Prototypes for the functions doing the actual work. */ +static int process_block (iconv_t cd, char *addr, size_t len, FILE **output, + const char *output_file); +static int process_fd (iconv_t cd, int fd, FILE **output, + const char *output_file); +static int process_file (iconv_t cd, FILE *input, FILE **output, + const char *output_file); +static void print_known_names (void) internal_function; + + +int +main (int argc, char *argv[]) +{ + int status = EXIT_SUCCESS; + int remaining; + iconv_t cd; + const char *orig_to_code; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* List all coded character sets if wanted. */ + if (list) + { + print_known_names (); + exit (EXIT_SUCCESS); + } + + /* If we have to ignore errors make sure we use the appropriate name for + the to-character-set. */ + orig_to_code = to_code; + if (omit_invalid) + { + const char *errhand = strchrnul (to_code, '/'); + int nslash = 2; + char *newp; + char *cp; + + if (*errhand == '/') + { + --nslash; + errhand = strchrnul (errhand + 1, '/'); + + if (*errhand == '/') + { + --nslash; + errhand = strchr (errhand, '\0'); + } + } + + newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); + cp = mempcpy (newp, to_code, errhand - to_code); + while (nslash-- > 0) + *cp++ = '/'; + if (cp[-1] != '/') + *cp++ = ','; + memcpy (cp, "IGNORE", sizeof ("IGNORE")); + + to_code = newp; + } + + /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f + can be file names of charmaps. In this case iconv will have to read + those charmaps and use them to do the conversion. But there are + holes in the specification. There is nothing said that if -f is a + charmap filename that -t must be, too. And vice versa. There is + also no word about the symbolic names used. What if they don't + match? */ + if (strchr (from_code, '/') != NULL) + /* The from-name might be a charmap file name. Try reading the + file. */ + from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); + + if (strchr (orig_to_code, '/') != NULL) + /* The to-name might be a charmap file name. Try reading the + file. */ + to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); + + + /* At this point we have to handle two cases. The first one is + where a charmap is used for the from- or to-charset, or both. We + handle this special since it is very different from the sane way of + doing things. The other case allows converting using the iconv() + function. */ + if (from_charmap != NULL || to_charmap != NULL) + /* Construct the conversion table and do the conversion. */ + status = charmap_conversion (from_code, from_charmap, to_code, to_charmap, + argc, remaining, argv, output_file); + else + { + /* Let's see whether we have these coded character sets. */ + cd = iconv_open (to_code, from_code); + if (cd == (iconv_t) -1) + { + if (errno == EINVAL) + { + /* Try to be nice with the user and tell her which of the + two encoding names is wrong. This is possible because + all supported encodings can be converted from/to Unicode, + in other words, because the graph of encodings is + connected. */ + bool from_wrong = + (iconv_open ("UTF-8", from_code) == (iconv_t) -1 + && errno == EINVAL); + bool to_wrong = + (iconv_open (to_code, "UTF-8") == (iconv_t) -1 + && errno == EINVAL); + const char *from_pretty = + (from_code[0] ? from_code : nl_langinfo (CODESET)); + const char *to_pretty = + (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); + + if (from_wrong) + { + if (to_wrong) + error (0, 0, + _("\ +conversions from `%s' and to `%s' are not supported"), + from_pretty, to_pretty); + else + error (0, 0, + _("conversion from `%s' is not supported"), + from_pretty); + } + else + { + if (to_wrong) + error (0, 0, + _("conversion to `%s' is not supported"), + to_pretty); + else + error (0, 0, + _("conversion from `%s' to `%s' is not supported"), + from_pretty, to_pretty); + } + + argp_help (&argp, stderr, ARGP_HELP_SEE, + program_invocation_short_name); + exit (1); + } + else + error (EXIT_FAILURE, errno, + _("failed to start conversion processing")); + } + + /* The output file. Will be opened when we are ready to produce + output. */ + FILE *output = NULL; + + /* Now process the remaining files. Write them to stdout or the file + specified with the `-o' parameter. If we have no file given as + the parameter process all from stdin. */ + if (remaining == argc) + { + if (process_file (cd, stdin, &output, output_file) != 0) + status = EXIT_FAILURE; + } + else + do + { +#ifdef _POSIX_MAPPED_FILES + struct stat64 st; + char *addr; +#endif + int fd, ret; + + if (verbose) + fprintf (stderr, "%s:\n", argv[remaining]); + if (strcmp (argv[remaining], "-") == 0) + fd = 0; + else + { + fd = open (argv[remaining], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), + argv[remaining]); + status = EXIT_FAILURE; + continue; + } + } + +#ifdef _POSIX_MAPPED_FILES + /* We have possibilities for reading the input file. First try + to mmap() it since this will provide the fastest solution. */ + if (fstat64 (fd, &st) == 0 + && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, + fd, 0)) != MAP_FAILED)) + { + /* Yes, we can use mmap(). The descriptor is not needed + anymore. */ + if (close (fd) != 0) + error (EXIT_FAILURE, errno, + _("error while closing input `%s'"), + argv[remaining]); + + ret = process_block (cd, addr, st.st_size, &output, + output_file); + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + + if (ret != 0) + { + status = EXIT_FAILURE; + + if (ret < 0) + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + } + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + ret = process_fd (cd, fd, &output, output_file); + + /* Now close the file. */ + close (fd); + + if (ret != 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + if (ret < 0) + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + } + } + while (++remaining < argc); + + /* Close the output file now. */ + if (output != NULL && fclose (output)) + error (EXIT_FAILURE, errno, _("error while closing output file")); + } + + return status; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'f': + from_code = arg; + break; + case 't': + to_code = arg; + break; + case 'o': + output_file = arg; + break; + case 's': + /* Nothing, for now at least. We are not giving out any information + about missing character or so. */ + break; + case 'c': + /* Omit invalid characters from output. */ + omit_invalid = 1; + break; + case OPT_VERBOSE: + verbose = 1; + break; + case OPT_LIST: + list = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "iconv %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +static int +write_output (const char *outbuf, const char *outptr, FILE **output, + const char *output_file) +{ + /* We have something to write out. */ + int errno_save = errno; + + if (*output == NULL) + { + /* Determine output file. */ + if (output_file != NULL && strcmp (output_file, "-") != 0) + { + *output = fopen (output_file, "w"); + if (*output == NULL) + error (EXIT_FAILURE, errno, _("cannot open output file")); + } + else + *output = stdout; + } + + if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf) + || ferror (*output)) + { + /* Error occurred while printing the result. */ + error (0, 0, _("\ +conversion stopped due to problem in writing the output")); + return -1; + } + + errno = errno_save; + + return 0; +} + + +static int +process_block (iconv_t cd, char *addr, size_t len, FILE **output, + const char *output_file) +{ +#define OUTBUF_SIZE 32768 + const char *start = addr; + char outbuf[OUTBUF_SIZE]; + char *outptr; + size_t outlen; + size_t n; + int ret = 0; + + while (len > 0) + { + outptr = outbuf; + outlen = OUTBUF_SIZE; + n = iconv (cd, &addr, &len, &outptr, &outlen); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { + ret = 1; + if (len == 0) + n = 0; + else + errno = E2BIG; + } + + if (outptr != outbuf) + { + ret = write_output (outbuf, outptr, output, output_file); + if (ret != 0) + break; + } + + if (n != (size_t) -1) + { + /* All the input test is processed. For state-dependent + character sets we have to flush the state now. */ + outptr = outbuf; + outlen = OUTBUF_SIZE; + n = iconv (cd, NULL, NULL, &outptr, &outlen); + + if (outptr != outbuf) + { + ret = write_output (outbuf, outptr, output, output_file); + if (ret != 0) + break; + } + + if (n != (size_t) -1) + break; + + if (omit_invalid && errno == EILSEQ) + { + ret = 1; + break; + } + } + + if (errno != E2BIG) + { + /* iconv() ran into a problem. */ + switch (errno) + { + case EILSEQ: + if (! omit_invalid) + error (0, 0, _("illegal input sequence at position %ld"), + (long int) (addr - start)); + break; + case EINVAL: + error (0, 0, _("\ +incomplete character or shift sequence at end of buffer")); + break; + case EBADF: + error (0, 0, _("internal error (illegal descriptor)")); + break; + default: + error (0, 0, _("unknown iconv() error %d"), errno); + break; + } + + return -1; + } + } + + return ret; +} + + +static int +process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) +{ + /* we have a problem with reading from a desriptor since we must not + provide the iconv() function an incomplete character or shift + sequence at the end of the buffer. Since we have to deal with + arbitrary encodings we must read the whole text in a buffer and + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; + char *inptr = NULL; + size_t actlen = 0; + + while (actlen < maxlen) + { + ssize_t n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + + if (actlen == maxlen) + while (1) + { + ssize_t n; + char *new_inbuf; + + /* Increase the buffer. */ + new_inbuf = (char *) realloc (inbuf, maxlen + 32768); + if (new_inbuf == NULL) + { + error (0, errno, _("unable to allocate buffer for input")); + return -1; + } + inbuf = new_inbuf; + maxlen += 32768; + inptr = inbuf + actlen; + + do + { + n = read (fd, inptr, maxlen - actlen); + + if (n == 0) + /* No more text to read. */ + break; + + if (n == -1) + { + /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } + + inptr += n; + actlen += n; + } + while (actlen < maxlen); + + if (n == 0) + /* Break again so we leave both loops. */ + break; + } + + /* Now we have all the input in the buffer. Process it in one run. */ + return process_block (cd, inbuf, actlen, output, output_file); +} + + +static int +process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file) +{ + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ + return process_fd (cd, fileno (input), output, output_file); +} + + +/* Print all known character sets/encodings. */ +static void *printlist; +static size_t column; +static int not_first; + +static void +insert_print_list (const void *nodep, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + const struct gconv_alias *s = *(const struct gconv_alias **) nodep; + tsearch (s->fromname, &printlist, (__compar_fn_t) strverscmp); + } +} + +static void +do_print_human (const void *nodep, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + const char *s = *(const char **) nodep; + size_t len = strlen (s); + size_t cnt; + + while (len > 0 && s[len - 1] == '/') + --len; + + for (cnt = 0; cnt < len; ++cnt) + if (isalnum (s[cnt])) + break; + if (cnt == len) + return; + + if (not_first) + { + putchar (','); + ++column; + + if (column > 2 && column + len > 77) + { + fputs ("\n ", stdout); + column = 2; + } + else + { + putchar (' '); + ++column; + } + } + else + not_first = 1; + + fwrite (s, len, 1, stdout); + column += len; + } +} + +static void +do_print (const void *nodep, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + const char *s = *(const char **) nodep; + + puts (s); + } +} + +static void +internal_function +add_known_names (struct gconv_module *node) +{ + if (node->left != NULL) + add_known_names (node->left); + if (node->right != NULL) + add_known_names (node->right); + do + { + if (strcmp (node->from_string, "INTERNAL") != 0) + tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp); + if (strcmp (node->to_string, "INTERNAL") != 0) + tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp); + + node = node->same; + } + while (node != NULL); +} + + +static void +insert_cache (void) +{ + const struct gconvcache_header *header; + const char *strtab; + const struct hash_entry *hashtab; + size_t cnt; + + header = (const struct gconvcache_header *) __gconv_get_cache (); + strtab = (char *) header + header->string_offset; + hashtab = (struct hash_entry *) ((char *) header + header->hash_offset); + + for (cnt = 0; cnt < header->hash_size; ++cnt) + if (hashtab[cnt].string_offset != 0) + { + const char *str = strtab + hashtab[cnt].string_offset; + + if (strcmp (str, "INTERNAL") != 0) + tsearch (str, &printlist, (__compar_fn_t) strverscmp); + } +} + + +static void +internal_function +print_known_names (void) +{ + iconv_t h; + void *cache; + + /* We must initialize the internal databases first. */ + h = iconv_open ("L1", "L1"); + iconv_close (h); + + /* See whether we have a cache. */ + cache = __gconv_get_cache (); + if (cache != NULL) + /* Yep, use only this information. */ + insert_cache (); + else + { + struct gconv_module *modules; + + /* No, then use the information read from the gconv-modules file. + First add the aliases. */ + twalk (__gconv_get_alias_db (), insert_print_list); + + /* Add the from- and to-names from the known modules. */ + modules = __gconv_get_modules_db (); + if (modules != NULL) + add_known_names (modules); + } + + bool human_readable = isatty (fileno (stdout)); + + if (human_readable) + fputs (_("\ +The following list contains all the coded character sets known. This does\n\ +not necessarily mean that all combinations of these names can be used for\n\ +the FROM and TO command line parameters. One coded character set can be\n\ +listed with several different names (aliases).\n\n "), stdout); + + /* Now print the collected names. */ + column = 2; + twalk (printlist, human_readable ? do_print_human : do_print); + + if (human_readable && column != 0) + puts (""); +} diff --git a/REORG.TODO/iconv/iconv_prog.h b/REORG.TODO/iconv/iconv_prog.h new file mode 100644 index 0000000000..1571fc9181 --- /dev/null +++ b/REORG.TODO/iconv/iconv_prog.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ICONV_PROG_H +#define _ICONV_PROG_H 1 + +#include <stdio.h> +#include <charmap.h> + + +/* Nonzero if verbose ouput is wanted. */ +extern int verbose; + +/* If nonzero omit invalid character from output. */ +extern int omit_invalid; + +/* Perform the conversion using a charmap or two. */ +extern int charmap_conversion (const char *from_code, + struct charmap_t *from_charmap, + const char *to_code, + struct charmap_t *to_charmap, + int argc, int remaining, char *argv[], + const char *output_file); + + +#endif /* iconv_prog.h */ diff --git a/REORG.TODO/iconv/iconvconfig.c b/REORG.TODO/iconv/iconvconfig.c new file mode 100644 index 0000000000..9be4111a42 --- /dev/null +++ b/REORG.TODO/iconv/iconvconfig.c @@ -0,0 +1,1245 @@ +/* Generate fastloading iconv module configuration files. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <argp.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <locale.h> +#include <mcheck.h> +#include <search.h> +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/cdefs.h> +#include <sys/uio.h> + +#include "iconvconfig.h" + +/* Get libc version number. */ +#include "../version.h" + +#define PACKAGE _libc_intl_domainname + + +/* The hashing function we use. */ +#include "../intl/hash-string.h" + + +/* Types used. */ +struct module +{ + char *fromname; + struct Strent *fromname_strent; + char *filename; + struct Strent *filename_strent; + const char *directory; + struct Strent *directory_strent; + struct module *next; + int cost; + struct Strent *toname_strent; + char toname[0]; +}; + +struct alias +{ + char *fromname; + struct Strent *froment; + struct module *module; + struct Strent *toent; + char toname[0]; +}; + +struct name +{ + const char *name; + struct Strent *strent; + int module_idx; + uint32_t hashval; +}; + +struct name_info +{ + const char *canonical_name; + struct Strent *canonical_strent; + + struct module *from_internal; + struct module *to_internal; + + struct other_conv_list + { + int dest_idx; + struct other_conv + { + gidx_t module_idx; + struct module *module; + struct other_conv *next; + } other_conv; + struct other_conv_list *next; + } *other_conv_list; +}; + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +/* Short description of program. */ +static const char doc[] = N_("\ +Create fastloading iconv module configuration file."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("[DIR...]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Definitions of arguments for argp functions. */ +#define OPT_PREFIX 300 +#define OPT_NOSTDLIB 301 +static const struct argp_option options[] = +{ + { "prefix", OPT_PREFIX, N_("PATH"), 0, + N_("Prefix used for all file accesses") }, + { "output", 'o', N_("FILE"), 0, N_("\ +Put output in FILE instead of installed location\ + (--prefix does not apply to FILE)") }, + { "nostdlib", OPT_NOSTDLIB, NULL, 0, + N_("Do not search standard directories, only those on the command line") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* The function doing the actual work. */ +static int handle_dir (const char *dir); + +/* Add all known builtin conversions and aliases. */ +static void add_builtins (void); + +/* Create list of all aliases without circular aliases. */ +static void get_aliases (void); + +/* Create list of all modules. */ +static void get_modules (void); + +/* Get list of all the names and thereby indexing them. */ +static void generate_name_list (void); + +/* Collect information about all the names. */ +static void generate_name_info (void); + +/* Write the output file. */ +static int write_output (void); + + +/* Prefix to be used for all file accesses. */ +static const char *prefix = ""; +/* Its length. */ +static size_t prefix_len; + +/* Directory to place output file in. */ +static const char *output_file; +/* Its length. */ +static size_t output_file_len; + +/* If true, omit the GCONV_PATH directories and require some arguments. */ +static bool nostdlib; + +/* Search tree of the modules we know. */ +static void *modules; + +/* Search tree of the aliases we know. */ +static void *aliases; + +/* Search tree for name to index mapping. */ +static void *names; + +/* Number of names we know about. */ +static int nnames; + +/* List of all aliases. */ +static struct alias **alias_list; +static size_t nalias_list; +static size_t nalias_list_max; + +/* List of all modules. */ +static struct module **module_list; +static size_t nmodule_list; +static size_t nmodule_list_max; + +/* Names and information about them. */ +static struct name_info *name_info; +static size_t nname_info; + +/* Number of translations not from or to INTERNAL. */ +static size_t nextra_modules; + + +/* Names and aliases for the builtin transformations. */ +static struct +{ + const char *from; + const char *to; +} builtin_alias[] = + { +#define BUILTIN_ALIAS(alias, real) \ + { .from = alias, .to = real }, +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) +#include <gconv_builtin.h> + }; +#undef BUILTIN_ALIAS +#undef BUILTIN_TRANSFORMATION +#define nbuiltin_alias (sizeof (builtin_alias) / sizeof (builtin_alias[0])) + +static struct +{ + const char *from; + const char *to; + const char *module; + int cost; +} builtin_trans[] = + { +#define BUILTIN_ALIAS(alias, real) +#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ + MinF, MaxF, MinT, MaxT) \ + { .from = From, .to = To, .module = Name, .cost = Cost }, +#include <gconv_builtin.h> + }; +#undef BUILTIN_ALIAS +#undef BUILTIN_TRANSFORMATION +#define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0])) + + +/* Filename extension for the modules. */ +#ifndef MODULE_EXT +# define MODULE_EXT ".so" +#endif +static const char gconv_module_ext[] = MODULE_EXT; + + +#include <programs/xmalloc.h> + + +/* C string table handling. */ +struct Strtab; +struct Strent; + +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + +/* String table we construct. */ +static struct Strtab *strtab; + + + +int +main (int argc, char *argv[]) +{ + int remaining; + int status = 0; + + /* Enable memory use testing. */ + /* mcheck_pedantic (NULL); */ + mtrace (); + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + if (nostdlib && remaining == argc) + error (2, 0, _("Directory arguments required when using --nostdlib")); + + /* Initialize the string table. */ + strtab = strtabinit (); + + /* Handle all directories mentioned. */ + while (remaining < argc) + status |= handle_dir (argv[remaining++]); + + if (! nostdlib) + { + /* In any case also handle the standard directory. */ + char *path = strdupa (GCONV_PATH), *tp = strsep (&path, ":"); + while (tp != NULL) + { + status |= handle_dir (tp); + + tp = strsep (&path, ":"); + } + } + + /* Add the builtin transformations and aliases without overwriting + anything. */ + add_builtins (); + + /* Store aliases in an array. */ + get_aliases (); + + /* Get list of all modules. */ + get_modules (); + + /* Generate list of all the names we know to handle in some way. */ + generate_name_list (); + + /* Now we know all the names we will handle, collect information + about them. */ + generate_name_info (); + + /* Write the output file, but only if we haven't seen any error. */ + if (status == 0) + status = write_output (); + else + error (1, 0, _("no output file produced because warnings were issued")); + + return status; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case OPT_PREFIX: + prefix = arg; + prefix_len = strlen (prefix); + break; + case 'o': + output_file = arg; + output_file_len = strlen (output_file); + break; + case OPT_NOSTDLIB: + nostdlib = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "iconvconfig %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +static int +alias_compare (const void *p1, const void *p2) +{ + const struct alias *a1 = (const struct alias *) p1; + const struct alias *a2 = (const struct alias *) p2; + + return strcmp (a1->fromname, a2->fromname); +} + + +static void +new_alias (const char *fromname, size_t fromlen, const char *toname, + size_t tolen) +{ + struct alias *newp; + void **inserted; + + newp = (struct alias *) xmalloc (sizeof (struct alias) + fromlen + tolen); + + newp->fromname = mempcpy (newp->toname, toname, tolen); + memcpy (newp->fromname, fromname, fromlen); + newp->module = NULL; + + inserted = (void **) tsearch (newp, &aliases, alias_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, gettext ("while inserting in search tree")); + if (*inserted != newp) + /* Something went wrong, free this entry. */ + free (newp); + else + { + newp->froment = strtabadd (strtab, newp->fromname, fromlen); + newp->toent = strtabadd (strtab, newp->toname, tolen); + } +} + + +/* Add new alias. */ +static void +add_alias (char *rp) +{ + /* We now expect two more string. The strings are normalized + (converted to UPPER case) and strored in the alias database. */ + char *from; + char *to; + char *wp; + + while (isspace (*rp)) + ++rp; + from = wp = rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + /* There is no `to' string on the line. Ignore it. */ + return; + *wp++ = '\0'; + to = ++rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (to == wp) + /* No `to' string, ignore the line. */ + return; + *wp++ = '\0'; + + assert (strlen (from) + 1 == (size_t) (to - from)); + assert (strlen (to) + 1 == (size_t) (wp - to)); + + new_alias (from, to - from, to, wp - to); +} + + +static void +append_alias (const void *nodep, VISIT value, int level) +{ + if (value != leaf && value != postorder) + return; + + if (nalias_list_max == nalias_list) + { + nalias_list_max += 50; + alias_list = (struct alias **) xrealloc (alias_list, + (nalias_list_max + * sizeof (struct alias *))); + } + + alias_list[nalias_list++] = *(struct alias **) nodep; +} + + +static void +get_aliases (void) +{ + twalk (aliases, append_alias); +} + + +static int +module_compare (const void *p1, const void *p2) +{ + const struct module *m1 = (const struct module *) p1; + const struct module *m2 = (const struct module *) p2; + int result; + + result = strcmp (m1->fromname, m2->fromname); + if (result == 0) + result = strcmp (m1->toname, m2->toname); + + return result; +} + + +/* Create new module record. */ +static void +new_module (const char *fromname, size_t fromlen, const char *toname, + size_t tolen, const char *directory, + const char *filename, size_t filelen, int cost, size_t need_ext) +{ + struct module *new_module; + size_t dirlen = strlen (directory) + 1; + char *tmp; + void **inserted; + + new_module = (struct module *) xmalloc (sizeof (struct module) + + fromlen + tolen + filelen + + need_ext); + + new_module->fromname = mempcpy (new_module->toname, toname, tolen); + + new_module->filename = mempcpy (new_module->fromname, fromname, fromlen); + + new_module->cost = cost; + new_module->next = NULL; + + tmp = mempcpy (new_module->filename, filename, filelen); + if (need_ext) + { + memcpy (tmp - 1, gconv_module_ext, need_ext + 1); + filelen += need_ext; + } + new_module->directory = directory; + + /* Now insert the new module data structure in our search tree. */ + inserted = (void **) tsearch (new_module, &modules, module_compare); + if (inserted == NULL) + error (EXIT_FAILURE, errno, "while inserting in search tree"); + if (*inserted != new_module) + free (new_module); + else + { + new_module->fromname_strent = strtabadd (strtab, new_module->fromname, + fromlen); + new_module->toname_strent = strtabadd (strtab, new_module->toname, + tolen); + new_module->filename_strent = strtabadd (strtab, new_module->filename, + filelen); + new_module->directory_strent = strtabadd (strtab, directory, dirlen); + } +} + + +/* Add new module. */ +static void +internal_function +add_module (char *rp, const char *directory) +{ + /* We expect now + 1. `from' name + 2. `to' name + 3. filename of the module + 4. an optional cost value + */ + char *from; + char *to; + char *module; + char *wp; + int need_ext; + int cost; + + while (isspace (*rp)) + ++rp; + from = rp; + while (*rp != '\0' && !isspace (*rp)) + { + *rp = toupper (*rp); + ++rp; + } + if (*rp == '\0') + return; + *rp++ = '\0'; + to = wp = rp; + while (isspace (*rp)) + ++rp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = toupper (*rp++); + if (*rp == '\0') + return; + *wp++ = '\0'; + do + ++rp; + while (isspace (*rp)); + module = wp; + while (*rp != '\0' && !isspace (*rp)) + *wp++ = *rp++; + if (*rp == '\0') + { + /* There is no cost, use one by default. */ + *wp++ = '\0'; + cost = 1; + } + else + { + /* There might be a cost value. */ + char *endp; + + *wp++ = '\0'; + cost = strtol (rp, &endp, 10); + if (rp == endp || cost < 1) + /* No useful information. */ + cost = 1; + } + + if (module[0] == '\0') + /* No module name given. */ + return; + + /* See whether we must add the ending. */ + need_ext = 0; + if ((size_t) (wp - module) < sizeof (gconv_module_ext) + || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, + sizeof (gconv_module_ext)) != 0) + /* We must add the module extension. */ + need_ext = sizeof (gconv_module_ext) - 1; + + assert (strlen (from) + 1 == (size_t) (to - from)); + assert (strlen (to) + 1 == (size_t) (module - to)); + assert (strlen (module) + 1 == (size_t) (wp - module)); + + new_module (from, to - from, to, module - to, directory, module, wp - module, + cost, need_ext); +} + + +/* Read the config file and add the data for this directory to that. */ +static int +handle_dir (const char *dir) +{ + char *cp; + FILE *fp; + char *line = NULL; + size_t linelen = 0; + size_t dirlen = strlen (dir); + + if (dir[dirlen - 1] != '/') + { + char *newp = (char *) xmalloc (dirlen + 2); + dir = memcpy (newp, dir, dirlen); + newp[dirlen++] = '/'; + newp[dirlen] = '\0'; + } + + char infile[prefix_len + dirlen + sizeof "gconv-modules"]; + cp = infile; + if (dir[0] == '/') + cp = mempcpy (cp, prefix, prefix_len); + strcpy (mempcpy (cp, dir, dirlen), "gconv-modules"); + + fp = fopen (infile, "r"); + if (fp == NULL) + { + error (0, errno, "cannot open `%s'", infile); + return 1; + } + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (!feof_unlocked (fp)) + { + char *rp, *endp, *word; + ssize_t n = __getdelim (&line, &linelen, '\n', fp); + + if (n < 0) + /* An error occurred. */ + break; + + rp = line; + /* Terminate the line (excluding comments or newline) with a NUL + byte to simplify the following code. */ + endp = strchr (rp, '#'); + if (endp != NULL) + *endp = '\0'; + else + if (rp[n - 1] == '\n') + rp[n - 1] = '\0'; + + while (isspace (*rp)) + ++rp; + + /* If this is an empty line go on with the next one. */ + if (rp == endp) + continue; + + word = rp; + while (*rp != '\0' && !isspace (*rp)) + ++rp; + + if (rp - word == sizeof ("alias") - 1 + && memcmp (word, "alias", sizeof ("alias") - 1) == 0) + add_alias (rp); + else if (rp - word == sizeof ("module") - 1 + && memcmp (word, "module", sizeof ("module") - 1) == 0) + add_module (rp, dir); + /* else */ + /* Otherwise ignore the line. */ + } + + free (line); + + fclose (fp); + + return 0; +} + + +static void +append_module (const void *nodep, VISIT value, int level) +{ + struct module *mo; + + if (value != leaf && value != postorder) + return; + + mo = *(struct module **) nodep; + + if (nmodule_list > 0 + && strcmp (module_list[nmodule_list - 1]->fromname, mo->fromname) == 0) + { + /* Same name. */ + mo->next = module_list[nmodule_list - 1]; + module_list[nmodule_list - 1] = mo; + + return; + } + + if (nmodule_list_max == nmodule_list) + { + nmodule_list_max += 50; + module_list = (struct module **) xrealloc (module_list, + (nmodule_list_max + * sizeof (struct module *))); + } + + module_list[nmodule_list++] = mo; +} + + +static void +get_modules (void) +{ + twalk (modules, append_module); +} + + +static void +add_builtins (void) +{ + size_t cnt; + + /* Add all aliases. */ + for (cnt = 0; cnt < nbuiltin_alias; ++cnt) + new_alias (builtin_alias[cnt].from, + strlen (builtin_alias[cnt].from) + 1, + builtin_alias[cnt].to, + strlen (builtin_alias[cnt].to) + 1); + + /* add the builtin transformations. */ + for (cnt = 0; cnt < nbuiltin_trans; ++cnt) + new_module (builtin_trans[cnt].from, + strlen (builtin_trans[cnt].from) + 1, + builtin_trans[cnt].to, + strlen (builtin_trans[cnt].to) + 1, + "", builtin_trans[cnt].module, + strlen (builtin_trans[cnt].module) + 1, + builtin_trans[cnt].cost, 0); +} + + +static int +name_compare (const void *p1, const void *p2) +{ + const struct name *n1 = (const struct name *) p1; + const struct name *n2 = (const struct name *) p2; + + return strcmp (n1->name, n2->name); +} + + +static struct name * +new_name (const char *str, struct Strent *strent) +{ + struct name *newp = (struct name *) xmalloc (sizeof (struct name)); + + newp->name = str; + newp->strent = strent; + newp->module_idx = -1; + newp->hashval = __hash_string (str); + + ++nnames; + + return newp; +} + + +static void +generate_name_list (void) +{ + size_t i; + + /* A name we always need. */ + tsearch (new_name ("INTERNAL", strtabadd (strtab, "INTERNAL", + sizeof ("INTERNAL"))), + &names, name_compare); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + if (strcmp (module_list[i]->fromname, "INTERNAL") != 0) + tsearch (new_name (module_list[i]->fromname, + module_list[i]->fromname_strent), + &names, name_compare); + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->toname, "INTERNAL") != 0) + tsearch (new_name (runp->toname, runp->toname_strent), + &names, name_compare); + } +} + + +static int +name_to_module_idx (const char *name, int add) +{ + struct name **res; + struct name fake_name = { .name = name }; + int idx; + + res = (struct name **) tfind (&fake_name, &names, name_compare); + if (res == NULL) + abort (); + + idx = (*res)->module_idx; + if (idx == -1 && add) + /* No module index assigned yet. */ + idx = (*res)->module_idx = nname_info++; + + return idx; +} + + +static void +generate_name_info (void) +{ + size_t i; + int idx; + + name_info = (struct name_info *) xcalloc (nmodule_list + 1, + sizeof (struct name_info)); + + /* First add a special entry for the INTERNAL name. This must have + index zero. */ + idx = name_to_module_idx ("INTERNAL", 1); + name_info[0].canonical_name = "INTERNAL"; + name_info[0].canonical_strent = strtabadd (strtab, "INTERNAL", + sizeof ("INTERNAL")); + assert (nname_info == 1); + + for (i = 0; i < nmodule_list; ++i) + { + struct module *runp; + + for (runp = module_list[i]; runp != NULL; runp = runp->next) + if (strcmp (runp->fromname, "INTERNAL") == 0) + { + idx = name_to_module_idx (runp->toname, 1); + name_info[idx].from_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->toname) == 0); + name_info[idx].canonical_name = runp->toname; + name_info[idx].canonical_strent = runp->toname_strent; + } + else if (strcmp (runp->toname, "INTERNAL") == 0) + { + idx = name_to_module_idx (runp->fromname, 1); + name_info[idx].to_internal = runp; + assert (name_info[idx].canonical_name == NULL + || strcmp (name_info[idx].canonical_name, + runp->fromname) == 0); + name_info[idx].canonical_name = runp->fromname; + name_info[idx].canonical_strent = runp->fromname_strent; + } + else + { + /* This is a transformation not to or from the INTERNAL + encoding. */ + int from_idx = name_to_module_idx (runp->fromname, 1); + int to_idx = name_to_module_idx (runp->toname, 1); + struct other_conv_list *newp; + + newp = (struct other_conv_list *) + xmalloc (sizeof (struct other_conv_list)); + newp->other_conv.module_idx = to_idx; + newp->other_conv.module = runp; + newp->other_conv.next = NULL; /* XXX Allow multiple module sequence */ + newp->dest_idx = to_idx; + newp->next = name_info[from_idx].other_conv_list; + name_info[from_idx].other_conv_list = newp; + assert (name_info[from_idx].canonical_name == NULL + || strcmp (name_info[from_idx].canonical_name, + runp->fromname) == 0); + name_info[from_idx].canonical_name = runp->fromname; + name_info[from_idx].canonical_strent = runp->fromname_strent; + + ++nextra_modules; + } + } + + /* Now add the module index information for all the aliases. */ + for (i = 0; i < nalias_list; ++i) + { + struct name fake_name = { .name = alias_list[i]->toname }; + struct name **tonamep; + + tonamep = (struct name **) tfind (&fake_name, &names, name_compare); + if (tonamep != NULL) + { + struct name *newp = new_name (alias_list[i]->fromname, + alias_list[i]->froment); + newp->module_idx = (*tonamep)->module_idx; + tsearch (newp, &names, name_compare); + } + } +} + + +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} + + +static uint32_t +next_prime (uint32_t seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} + + +/* Format of the output file. + + Offset Length Description + 0000 4 Magic header bytes + 0004 2 Offset of string table (stoff) + 0006 2 Offset of name hashing table (hoff) + 0008 2 Hashing table size (hsize) + 000A 2 Offset of module table (moff) + 000C 2 Offset of other conversion module table (ooff) + + stoff ??? String table + + hoff 8*hsize Array of tuples + string table offset + module index + + moff ??? Array of tuples + canonical name offset + from-internal module dir name offset + from-internal module name off + to-internal module dir name offset + to-internal module name offset + offset into other conversion table + + ooff ??? One or more of + number of steps/modules + one or more of tuple + canonical name offset for output + module dir name offset + module name offset + (following last entry with step count 0) +*/ + +static struct hash_entry *hash_table; +static size_t hash_size; + +/* Function to insert the names. */ +static void name_insert (const void *nodep, VISIT value, int level) +{ + struct name *name; + unsigned int idx; + unsigned int hval2; + + if (value != leaf && value != postorder) + return; + + name = *(struct name **) nodep; + idx = name->hashval % hash_size; + hval2 = 1 + name->hashval % (hash_size - 2); + + while (hash_table[idx].string_offset != 0) + if ((idx += hval2) >= hash_size) + idx -= hash_size; + + hash_table[idx].string_offset = strtaboffset (name->strent); + + assert (name->module_idx != -1); + hash_table[idx].module_idx = name->module_idx; +} + +static int +write_output (void) +{ + int fd; + char *string_table; + size_t string_table_size; + struct gconvcache_header header; + struct module_entry *module_table; + char *extra_table; + char *cur_extra_table; + size_t n; + int idx; + struct iovec iov[6]; + static const gidx_t null_word; + size_t total; + char finalname[prefix_len + sizeof GCONV_MODULES_CACHE]; + char tmpfname[(output_file == NULL ? sizeof finalname : output_file_len + 1) + + strlen (".XXXXXX")]; + + /* Open the output file. */ + if (output_file == NULL) + { + assert (GCONV_MODULES_CACHE[0] == '/'); + strcpy (stpcpy (mempcpy (tmpfname, prefix, prefix_len), + GCONV_MODULES_CACHE), + ".XXXXXX"); + strcpy (mempcpy (finalname, prefix, prefix_len), GCONV_MODULES_CACHE); + } + else + strcpy (mempcpy (tmpfname, output_file, output_file_len), ".XXXXXX"); + fd = mkstemp (tmpfname); + if (fd == -1) + return 1; + + /* Create the string table. */ + string_table = strtabfinalize (strtab, &string_table_size); + + /* Create the hashing table. We know how many strings we have. + Creating a perfect hash table is not reasonable here. Therefore + we use open hashing and a table size which is the next prime 40% + larger than the number of strings. */ + hash_size = next_prime (nnames * 1.4); + hash_table = (struct hash_entry *) xcalloc (hash_size, + sizeof (struct hash_entry)); + /* Fill the hash table. */ + twalk (names, name_insert); + + /* Create the section for the module list. */ + module_table = (struct module_entry *) xcalloc (sizeof (struct module_entry), + nname_info); + + /* Allocate memory for the non-INTERNAL conversions. The allocated + memory can be more than is actually needed. */ + extra_table = (char *) xcalloc (sizeof (struct extra_entry) + + sizeof (gidx_t) + + sizeof (struct extra_entry_module), + nextra_modules); + cur_extra_table = extra_table; + + /* Fill in the module information. */ + for (n = 0; n < nname_info; ++n) + { + module_table[n].canonname_offset = + strtaboffset (name_info[n].canonical_strent); + + if (name_info[n].from_internal == NULL) + { + module_table[n].fromdir_offset = 0; + module_table[n].fromname_offset = 0; + } + else + { + module_table[n].fromdir_offset = + strtaboffset (name_info[n].from_internal->directory_strent); + module_table[n].fromname_offset = + strtaboffset (name_info[n].from_internal->filename_strent); + } + + if (name_info[n].to_internal == NULL) + { + module_table[n].todir_offset = 0; + module_table[n].toname_offset = 0; + } + else + { + module_table[n].todir_offset = + strtaboffset (name_info[n].to_internal->directory_strent); + module_table[n].toname_offset = + strtaboffset (name_info[n].to_internal->filename_strent); + } + + if (name_info[n].other_conv_list != NULL) + { + struct other_conv_list *other = name_info[n].other_conv_list; + + /* Store the reference. We add 1 to distinguish the entry + at offset zero from the case where no extra modules are + available. The file reader has to account for the + offset. */ + module_table[n].extra_offset = 1 + cur_extra_table - extra_table; + + do + { + struct other_conv *runp; + struct extra_entry *extra; + + /* Allocate new entry. */ + extra = (struct extra_entry *) cur_extra_table; + cur_extra_table += sizeof (struct extra_entry); + extra->module_cnt = 0; + + runp = &other->other_conv; + do + { + cur_extra_table += sizeof (struct extra_entry_module); + extra->module[extra->module_cnt].outname_offset = + runp->next == NULL + ? other->dest_idx : runp->next->module_idx; + extra->module[extra->module_cnt].dir_offset = + strtaboffset (runp->module->directory_strent); + extra->module[extra->module_cnt].name_offset = + strtaboffset (runp->module->filename_strent); + ++extra->module_cnt; + + runp = runp->next; + } + while (runp != NULL); + + other = other->next; + } + while (other != NULL); + + /* Final module_cnt is zero. */ + *((gidx_t *) cur_extra_table) = 0; + cur_extra_table += sizeof (gidx_t); + } + } + + /* Clear padding. */ + memset (&header, 0, sizeof (struct gconvcache_header)); + + header.magic = GCONVCACHE_MAGIC; + + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct gconvcache_header); + total = iov[0].iov_len; + + header.string_offset = total; + iov[1].iov_base = string_table; + iov[1].iov_len = string_table_size; + total += iov[1].iov_len; + + idx = 2; + if ((string_table_size & (sizeof (gidx_t) - 1)) != 0) + { + iov[2].iov_base = (void *) &null_word; + iov[2].iov_len = (sizeof (gidx_t) + - (string_table_size & (sizeof (gidx_t) - 1))); + total += iov[2].iov_len; + ++idx; + } + + header.hash_offset = total; + header.hash_size = hash_size; + iov[idx].iov_base = hash_table; + iov[idx].iov_len = hash_size * sizeof (struct hash_entry); + total += iov[idx].iov_len; + ++idx; + + header.module_offset = total; + iov[idx].iov_base = module_table; + iov[idx].iov_len = nname_info * sizeof (struct module_entry); + total += iov[idx].iov_len; + ++idx; + + assert ((size_t) (cur_extra_table - extra_table) + <= ((sizeof (struct extra_entry) + sizeof (gidx_t) + + sizeof (struct extra_entry_module)) + * nextra_modules)); + header.otherconv_offset = total; + iov[idx].iov_base = extra_table; + iov[idx].iov_len = cur_extra_table - extra_table; + total += iov[idx].iov_len; + ++idx; + + if ((size_t) TEMP_FAILURE_RETRY (writev (fd, iov, idx)) != total + /* The file was created with mode 0600. Make it world-readable. */ + || fchmod (fd, 0644) != 0 + /* Rename the file, possibly replacing an old one. */ + || rename (tmpfname, output_file ?: finalname) != 0) + { + int save_errno = errno; + close (fd); + unlink (tmpfname); + error (EXIT_FAILURE, save_errno, + gettext ("cannot generate output file")); + } + + close (fd); + + return 0; +} diff --git a/REORG.TODO/iconv/iconvconfig.h b/REORG.TODO/iconv/iconvconfig.h new file mode 100644 index 0000000000..3f9fbdb1f4 --- /dev/null +++ b/REORG.TODO/iconv/iconvconfig.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + + +typedef uint16_t gidx_t; + + +struct gconvcache_header +{ + uint32_t magic; + gidx_t string_offset; + gidx_t hash_offset; + gidx_t hash_size; + gidx_t module_offset; + gidx_t otherconv_offset; +}; + +struct hash_entry +{ + gidx_t string_offset; + gidx_t module_idx; +}; + +struct module_entry +{ + gidx_t canonname_offset; + gidx_t fromdir_offset; + gidx_t fromname_offset; + gidx_t todir_offset; + gidx_t toname_offset; + gidx_t extra_offset; +}; + +struct extra_entry +{ + gidx_t module_cnt; + struct extra_entry_module + { + gidx_t outname_offset; + gidx_t dir_offset; + gidx_t name_offset; + } module[0]; +}; + + +#define GCONVCACHE_MAGIC 0x20010324 + + +#define GCONV_MODULES_CACHE GCONV_DIR "/gconv-modules.cache" diff --git a/REORG.TODO/iconv/loop.c b/REORG.TODO/iconv/loop.c new file mode 100644 index 0000000000..0160f72cd6 --- /dev/null +++ b/REORG.TODO/iconv/loop.c @@ -0,0 +1,523 @@ +/* Conversion loop frame work. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file provides a frame for the reader loop in all conversion modules. + The actual code must (of course) be provided in the actual module source + code but certain actions can be written down generically, with some + customization options which are these: + + MIN_NEEDED_INPUT minimal number of input bytes needed for the next + conversion. + MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round + of conversion. + + MAX_NEEDED_INPUT you guess it, this is the maximal number of input + bytes needed. It defaults to MIN_NEEDED_INPUT + MAX_NEEDED_OUTPUT likewise for output bytes. + + LOOPFCT name of the function created. If not specified + the name is `loop' but this prevents the use + of multiple functions in the same file. + + BODY this is supposed to expand to the body of the loop. + The user must provide this. + + EXTRA_LOOP_DECLS extra arguments passed from conversion loop call. + + INIT_PARAMS code to define and initialize variables from params. + UPDATE_PARAMS code to store result in params. + + ONEBYTE_BODY body of the specialized conversion function for a + single byte from the current character set to INTERNAL. +*/ + +#include <assert.h> +#include <endian.h> +#include <gconv.h> +#include <stdint.h> +#include <string.h> +#include <wchar.h> +#include <sys/param.h> /* For MIN. */ +#define __need_size_t +#include <stddef.h> +#include <libc-diag.h> + +/* We have to provide support for machines which are not able to handled + unaligned memory accesses. Some of the character encodings have + representations with a fixed width of 2 or 4 bytes. But if we cannot + access unaligned memory we still have to read byte-wise. */ +#undef FCTNAME2 +#if _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED +/* We can handle unaligned memory access. */ +# define get16(addr) *((const uint16_t *) (addr)) +# define get32(addr) *((const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16(addr, val) *((uint16_t *) (addr)) = (val) +# define put32(addr, val) *((uint32_t *) (addr)) = (val) + +# define FCTNAME2(name) name +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16(addr) \ + (((const unsigned char *) (addr))[1] << 8 \ + | ((const unsigned char *) (addr))[0]) +# define get32(addr) \ + (((((const unsigned char *) (addr))[3] << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[0]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16(addr) \ + (((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) +# define get32(addr) \ + (((((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[3]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif + +# define FCTNAME2(name) name##_unaligned +#endif +#define FCTNAME(name) FCTNAME2(name) + + +/* We need at least one byte for the next round. */ +#ifndef MIN_NEEDED_INPUT +# error "MIN_NEEDED_INPUT definition missing" +#elif MIN_NEEDED_INPUT < 1 +# error "MIN_NEEDED_INPUT must be >= 1" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_INPUT +# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT +#endif + +/* We produce at least one byte in the next round. */ +#ifndef MIN_NEEDED_OUTPUT +# error "MIN_NEEDED_OUTPUT definition missing" +#elif MIN_NEEDED_OUTPUT < 1 +# error "MIN_NEEDED_OUTPUT must be >= 1" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_OUTPUT +# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT +#endif + +/* Default name for the function. */ +#ifndef LOOPFCT +# define LOOPFCT loop +#endif + +/* Make sure we have a loop body. */ +#ifndef BODY +# error "Definition of BODY missing for function" LOOPFCT +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_DECLS +# define EXTRA_LOOP_DECLS +#endif + +/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test + isn't possible. */ +#ifndef UPDATE_PARAMS +# define UPDATE_PARAMS do { } while (0) +#endif +#ifndef REINIT_PARAMS +# define REINIT_PARAMS do { } while (0) +#endif + + +/* To make it easier for the writers of the modules, we define a macro + to test whether we have to ignore errors. */ +#define ignore_errors_p() \ + (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS)) + + +/* Error handling for the FROM_LOOP direction, with ignoring of errors. + Note that we cannot use the do while (0) trick since `break' and + `continue' must reach certain points. */ +#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + \ + if (! ignore_errors_p ()) \ + break; \ + \ + /* We ignore the invalid input byte sequence. */ \ + inptr += (Incr); \ + ++*irreversible; \ + /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \ + that "iconv -c" must give the same exitcode as "iconv". */ \ + continue; \ + } + +/* Error handling for the TO_LOOP direction, with use of transliteration/ + transcription functions and ignoring of errors. Note that we cannot use + the do while (0) trick since `break' and `continue' must reach certain + points. */ +#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ + case we are not doing any error recovery outself. */ \ + break; \ + \ + /* If needed, flush any conversion state, so that __gconv_transliterate \ + starts with current shift state. */ \ + UPDATE_PARAMS; \ + \ + /* First try the transliteration methods. */ \ + if ((step_data->__flags & __GCONV_TRANSLIT) != 0) \ + result = __gconv_transliterate \ + (step, step_data, *inptrp, \ + &inptr, inend, &outptr, irreversible); \ + \ + REINIT_PARAMS; \ + \ + /* If any of them recognized the input continue with the loop. */ \ + if (result != __GCONV_ILLEGAL_INPUT) \ + { \ + if (__glibc_unlikely (result == __GCONV_FULL_OUTPUT)) \ + break; \ + \ + continue; \ + } \ + \ + /* Next see whether we have to ignore the error. If not, stop. */ \ + if (! ignore_errors_p ()) \ + break; \ + \ + /* When we come here it means we ignore the character. */ \ + ++*irreversible; \ + inptr += Incr; \ + /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \ + that "iconv -c" must give the same exitcode as "iconv". */ \ + continue; \ + } + + +/* Handling of Unicode 3.1 TAG characters. Unicode recommends + "If language codes are not relevant to the particular processing + operation, then they should be ignored." This macro is usually + called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */ +#define UNICODE_TAG_HANDLER(Character, Incr) \ + { \ + /* TAG characters are those in the range U+E0000..U+E007F. */ \ + if (((Character) >> 7) == (0xe0000 >> 7)) \ + { \ + inptr += Incr; \ + continue; \ + } \ + } + + +/* The function returns the status, as defined in gconv.h. */ +static inline int +__attribute ((always_inline)) +FCTNAME (LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, const unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ +#ifdef LOOP_NEED_STATE + mbstate_t *state = step_data->__statep; +#endif +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_EMPTY_INPUT; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + + while (inptr != inend) + { + /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the + compiler generating better code. They will be optimized away + since MIN_NEEDED_OUTPUT is always a constant. */ + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) + { + /* We don't have enough input for another complete input + character. */ + result = __GCONV_INCOMPLETE_INPUT; + break; + } + if ((MIN_NEEDED_OUTPUT != 1 + && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) + || (MIN_NEEDED_OUTPUT == 1 + && __builtin_expect (outptr >= outend, 0))) + { + /* Overflow in the output buffer. */ + result = __GCONV_FULL_OUTPUT; + break; + } + + /* Here comes the body the user provides. It can stop with + RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the + input characters vary in size), GCONV_ILLEGAL_INPUT, or + GCONV_FULL_OUTPUT (if the output characters vary in size). */ + BODY + } + + /* Update the pointers pointed to by the parameters. */ + *inptrp = inptr; + *outptrp = outptr; + UPDATE_PARAMS; + + return result; +} + + +/* Include the file a second time to define the function to handle + unaligned access. */ +#if !defined DEFINE_UNALIGNED && !_STRING_ARCH_unaligned \ + && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \ + && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0 +# undef get16 +# undef get32 +# undef put16 +# undef put32 +# undef unaligned + +# define DEFINE_UNALIGNED +# include "loop.c" +# undef DEFINE_UNALIGNED +#else +# if MAX_NEEDED_INPUT > 1 +# define SINGLE(fct) SINGLE2 (fct) +# define SINGLE2(fct) fct##_single +static inline int +__attribute ((always_inline)) +SINGLE(LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ + mbstate_t *state = step_data->__statep; +# ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +# endif +# ifdef LOOP_NEED_DATA + void *data = step->__data; +# endif + int result = __GCONV_OK; + unsigned char bytebuf[MAX_NEEDED_INPUT]; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t inlen; + +# ifdef INIT_PARAMS + INIT_PARAMS; +# endif + +# ifdef UNPACK_BYTES + UNPACK_BYTES +# else + /* Add the bytes from the state to the input buffer. */ + assert ((state->__count & 7) <= sizeof (state->__value)); + for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) + bytebuf[inlen] = state->__value.__wchb[inlen]; +# endif + + /* Are there enough bytes in the input buffer? */ + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) + { + *inptrp = inend; +# ifdef STORE_REST + + /* Building with -O3 GCC emits a `array subscript is above array + bounds' warning. GCC BZ #64739 has been opened for this. */ + DIAG_PUSH_NEEDS_COMMENT; + DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Warray-bounds"); + while (inptr < inend) + bytebuf[inlen++] = *inptr++; + DIAG_POP_NEEDS_COMMENT; + + inptr = bytebuf; + inptrp = &inptr; + inend = &bytebuf[inlen]; + + STORE_REST +# else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +# endif + + return __GCONV_INCOMPLETE_INPUT; + } + + /* Enough space in output buffer. */ + if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + /* Overflow in the output buffer. */ + return __GCONV_FULL_OUTPUT; + + /* Now add characters from the normal input buffer. */ + do + bytebuf[inlen++] = *inptr++; + while (inlen < MAX_NEEDED_INPUT && inptr < inend); + + inptr = bytebuf; + inend = &bytebuf[inlen]; + + do + { + BODY + } + while (0); + + /* Now we either have produced an output character and consumed all the + bytes from the state and at least one more, or the character is still + incomplete, or we have some other error (like illegal input character, + no space in output buffer). */ + if (__glibc_likely (inptr != bytebuf)) + { + /* We found a new character. */ + assert (inptr - bytebuf > (state->__count & 7)); + + *inptrp += inptr - bytebuf - (state->__count & 7); + *outptrp = outptr; + + result = __GCONV_OK; + + /* Clear the state buffer. */ +# ifdef CLEAR_STATE + CLEAR_STATE; +# else + state->__count &= ~7; +# endif + } + else if (result == __GCONV_INCOMPLETE_INPUT) + { + /* This can only happen if we have less than MAX_NEEDED_INPUT bytes + available. */ + assert (inend != &bytebuf[MAX_NEEDED_INPUT]); + + *inptrp += inend - bytebuf - (state->__count & 7); +# ifdef STORE_REST + inptrp = &inptr; + + STORE_REST +# else + /* We don't have enough input for another complete input + character. */ + assert (inend - inptr > (state->__count & ~7)); + assert (inend - inptr <= sizeof (state->__value)); + state->__count = (state->__count & ~7) | (inend - inptr); + inlen = 0; + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +# endif + } + + return result; +} +# undef SINGLE +# undef SINGLE2 +# endif + + +# ifdef ONEBYTE_BODY +/* Define the shortcut function for btowc. */ +static wint_t +gconv_btowc (struct __gconv_step *step, unsigned char c) + ONEBYTE_BODY +# define FROM_ONEBYTE gconv_btowc +# endif + +#endif + +/* We remove the macro definitions so that we can include this file again + for the definition of another function. */ +#undef MIN_NEEDED_INPUT +#undef MAX_NEEDED_INPUT +#undef MIN_NEEDED_OUTPUT +#undef MAX_NEEDED_OUTPUT +#undef LOOPFCT +#undef BODY +#undef LOOPFCT +#undef EXTRA_LOOP_DECLS +#undef INIT_PARAMS +#undef UPDATE_PARAMS +#undef REINIT_PARAMS +#undef ONEBYTE_BODY +#undef UNPACK_BYTES +#undef CLEAR_STATE +#undef LOOP_NEED_STATE +#undef LOOP_NEED_FLAGS +#undef LOOP_NEED_DATA +#undef get16 +#undef get32 +#undef put16 +#undef put32 +#undef unaligned diff --git a/REORG.TODO/iconv/skeleton.c b/REORG.TODO/iconv/skeleton.c new file mode 100644 index 0000000000..a12119dc20 --- /dev/null +++ b/REORG.TODO/iconv/skeleton.c @@ -0,0 +1,821 @@ +/* Skeleton for a conversion module. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file can be included to provide definitions of several things + many modules have in common. It can be customized using the following + macros: + + DEFINE_INIT define the default initializer. This requires the + following symbol to be defined. + + CHARSET_NAME string with official name of the coded character + set (in all-caps) + + DEFINE_FINI define the default destructor function. + + MIN_NEEDED_FROM minimal number of bytes needed for the from-charset. + MIN_NEEDED_TO likewise for the to-charset. + + MAX_NEEDED_FROM maximal number of bytes needed for the from-charset. + This macro is optional, it defaults to MIN_NEEDED_FROM. + MAX_NEEDED_TO likewise for the to-charset. + + FROM_LOOP_MIN_NEEDED_FROM + FROM_LOOP_MAX_NEEDED_FROM + minimal/maximal number of bytes needed on input + of one round through the FROM_LOOP. Defaults + to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively. + FROM_LOOP_MIN_NEEDED_TO + FROM_LOOP_MAX_NEEDED_TO + minimal/maximal number of bytes needed on output + of one round through the FROM_LOOP. Defaults + to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively. + TO_LOOP_MIN_NEEDED_FROM + TO_LOOP_MAX_NEEDED_FROM + minimal/maximal number of bytes needed on input + of one round through the TO_LOOP. Defaults + to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively. + TO_LOOP_MIN_NEEDED_TO + TO_LOOP_MAX_NEEDED_TO + minimal/maximal number of bytes needed on output + of one round through the TO_LOOP. Defaults + to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively. + + FROM_DIRECTION this macro is supposed to return a value != 0 + if we convert from the current character set, + otherwise it return 0. + + EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it + defines some code which writes out a sequence + of bytes which bring the current state into + the initial state. + + FROM_LOOP name of the function implementing the conversion + from the current character set. + TO_LOOP likewise for the other direction + + ONE_DIRECTION optional. If defined to 1, only one conversion + direction is defined instead of two. In this + case, FROM_DIRECTION should be defined to 1, and + FROM_LOOP and TO_LOOP should have the same value. + + SAVE_RESET_STATE in case of an error we must reset the state for + the rerun so this macro must be defined for + stateful encodings. It takes an argument which + is nonzero when saving. + + RESET_INPUT_BUFFER If the input character sets allow this the macro + can be defined to reset the input buffer pointers + to cover only those characters up to the error. + + FUNCTION_NAME if not set the conversion function is named `gconv'. + + PREPARE_LOOP optional code preparing the conversion loop. Can + contain variable definitions. + END_LOOP also optional, may be used to store information + + EXTRA_LOOP_ARGS optional macro specifying extra arguments passed + to loop function. + + STORE_REST optional, needed only when MAX_NEEDED_FROM > 4. + This macro stores the seen but unconverted input bytes + in the state. + + FROM_ONEBYTE optional. If defined, should be the name of a + specialized conversion function for a single byte + from the current character set to INTERNAL. This + function has prototype + wint_t + FROM_ONEBYTE (struct __gconv_step *, unsigned char); + and does a special conversion: + - The input is a single byte. + - The output is a single uint32_t. + - The state before the conversion is the initial state; + the state after the conversion is irrelevant. + - No transliteration. + - __invocation_counter = 0. + - __internal_use = 1. + - do_flush = 0. + + Modules can use mbstate_t to store conversion state as follows: + + * Bits 2..0 of '__count' contain the number of lookahead input bytes + stored in __value.__wchb. Always zero if the converter never + returns __GCONV_INCOMPLETE_INPUT. + + * Bits 31..3 of '__count' are module dependent shift state. + + * __value: When STORE_REST/UNPACK_BYTES aren't defined and when the + converter has returned __GCONV_INCOMPLETE_INPUT, this contains + at most 4 lookahead bytes. Converters with an mb_cur_max > 4 + (currently only UTF-8) must find a way to store their state + in __value.__wch and define STORE_REST/UNPACK_BYTES appropriately. + + When __value contains lookahead, __count must not be zero, because + the converter is not in the initial state then, and mbsinit() -- + defined as a (__count == 0) test -- must reflect this. + */ + +#include <assert.h> +#include <gconv.h> +#include <string.h> +#define __need_size_t +#define __need_NULL +#include <stddef.h> + +#ifndef STATIC_GCONV +# include <dlfcn.h> +#endif + +#include <sysdep.h> +#include <stdint.h> + +#ifndef DL_CALL_FCT +# define DL_CALL_FCT(fct, args) fct args +#endif + +/* The direction objects. */ +#if DEFINE_INIT +# ifndef FROM_DIRECTION +# define FROM_DIRECTION_VAL NULL +# define TO_DIRECTION_VAL ((void *) ~((uintptr_t) 0)) +# define FROM_DIRECTION (step->__data == FROM_DIRECTION_VAL) +# endif +#else +# ifndef FROM_DIRECTION +# error "FROM_DIRECTION must be provided if non-default init is used" +# endif +#endif + +/* How many bytes are needed at most for the from-charset. */ +#ifndef MAX_NEEDED_FROM +# define MAX_NEEDED_FROM MIN_NEEDED_FROM +#endif + +/* Same for the to-charset. */ +#ifndef MAX_NEEDED_TO +# define MAX_NEEDED_TO MIN_NEEDED_TO +#endif + +/* Defaults for the per-direction min/max constants. */ +#ifndef FROM_LOOP_MIN_NEEDED_FROM +# define FROM_LOOP_MIN_NEEDED_FROM MIN_NEEDED_FROM +#endif +#ifndef FROM_LOOP_MAX_NEEDED_FROM +# define FROM_LOOP_MAX_NEEDED_FROM MAX_NEEDED_FROM +#endif +#ifndef FROM_LOOP_MIN_NEEDED_TO +# define FROM_LOOP_MIN_NEEDED_TO MIN_NEEDED_TO +#endif +#ifndef FROM_LOOP_MAX_NEEDED_TO +# define FROM_LOOP_MAX_NEEDED_TO MAX_NEEDED_TO +#endif +#ifndef TO_LOOP_MIN_NEEDED_FROM +# define TO_LOOP_MIN_NEEDED_FROM MIN_NEEDED_TO +#endif +#ifndef TO_LOOP_MAX_NEEDED_FROM +# define TO_LOOP_MAX_NEEDED_FROM MAX_NEEDED_TO +#endif +#ifndef TO_LOOP_MIN_NEEDED_TO +# define TO_LOOP_MIN_NEEDED_TO MIN_NEEDED_FROM +#endif +#ifndef TO_LOOP_MAX_NEEDED_TO +# define TO_LOOP_MAX_NEEDED_TO MAX_NEEDED_FROM +#endif + + +/* Define macros which can access unaligned buffers. These macros are + supposed to be used only in code outside the inner loops. For the inner + loops we have other definitions which allow optimized access. */ +#if _STRING_ARCH_unaligned +/* We can handle unaligned memory access. */ +# define get16u(addr) *((const uint16_t *) (addr)) +# define get32u(addr) *((const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16u(addr, val) *((uint16_t *) (addr)) = (val) +# define put32u(addr, val) *((uint32_t *) (addr)) = (val) +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16u(addr) \ + (((const unsigned char *) (addr))[1] << 8 \ + | ((const unsigned char *) (addr))[0]) +# define get32u(addr) \ + (((((const unsigned char *) (addr))[3] << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[0]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16u(addr) \ + (((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) +# define get32u(addr) \ + (((((const unsigned char *) (addr))[0] << 8 \ + | ((const unsigned char *) (addr))[1]) << 8 \ + | ((const unsigned char *) (addr))[2]) << 8 \ + | ((const unsigned char *) (addr))[3]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif +#endif + + +/* For conversions from a fixed width character set to another fixed width + character set we can define RESET_INPUT_BUFFER in a very fast way. */ +#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE +# if FROM_LOOP_MIN_NEEDED_FROM == FROM_LOOP_MAX_NEEDED_FROM \ + && FROM_LOOP_MIN_NEEDED_TO == FROM_LOOP_MAX_NEEDED_TO \ + && TO_LOOP_MIN_NEEDED_FROM == TO_LOOP_MAX_NEEDED_FROM \ + && TO_LOOP_MIN_NEEDED_TO == TO_LOOP_MAX_NEEDED_TO +/* We have to use these `if's here since the compiler cannot know that + (outbuf - outerr) is always divisible by FROM/TO_LOOP_MIN_NEEDED_TO. + The ?:1 avoids division by zero warnings that gcc 3.2 emits even for + obviously unreachable code. */ +# define RESET_INPUT_BUFFER \ + if (FROM_DIRECTION) \ + { \ + if (FROM_LOOP_MIN_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_TO == 0) \ + *inptrp -= (outbuf - outerr) \ + * (FROM_LOOP_MIN_NEEDED_FROM / FROM_LOOP_MIN_NEEDED_TO); \ + else if (FROM_LOOP_MIN_NEEDED_TO % FROM_LOOP_MIN_NEEDED_FROM == 0) \ + *inptrp -= (outbuf - outerr) \ + / (FROM_LOOP_MIN_NEEDED_TO / FROM_LOOP_MIN_NEEDED_FROM \ + ? : 1); \ + else \ + *inptrp -= ((outbuf - outerr) / FROM_LOOP_MIN_NEEDED_TO) \ + * FROM_LOOP_MIN_NEEDED_FROM; \ + } \ + else \ + { \ + if (TO_LOOP_MIN_NEEDED_FROM % TO_LOOP_MIN_NEEDED_TO == 0) \ + *inptrp -= (outbuf - outerr) \ + * (TO_LOOP_MIN_NEEDED_FROM / TO_LOOP_MIN_NEEDED_TO); \ + else if (TO_LOOP_MIN_NEEDED_TO % TO_LOOP_MIN_NEEDED_FROM == 0) \ + *inptrp -= (outbuf - outerr) \ + / (TO_LOOP_MIN_NEEDED_TO / TO_LOOP_MIN_NEEDED_FROM ? : 1); \ + else \ + *inptrp -= ((outbuf - outerr) / TO_LOOP_MIN_NEEDED_TO) \ + * TO_LOOP_MIN_NEEDED_FROM; \ + } +# endif +#endif + + +/* The default init function. It simply matches the name and initializes + the step data to point to one of the objects above. */ +#if DEFINE_INIT +# ifndef CHARSET_NAME +# error "CHARSET_NAME not defined" +# endif + +extern int gconv_init (struct __gconv_step *step); +int +gconv_init (struct __gconv_step *step) +{ + /* Determine which direction. */ + if (strcmp (step->__from_name, CHARSET_NAME) == 0) + { + step->__data = FROM_DIRECTION_VAL; + + step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM; + step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM; + step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO; + step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO; + +#ifdef FROM_ONEBYTE + step->__btowc_fct = FROM_ONEBYTE; +#endif + } + else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0) + { + step->__data = TO_DIRECTION_VAL; + + step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM; + step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM; + step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO; + step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO; + } + else + return __GCONV_NOCONV; + +#ifdef SAVE_RESET_STATE + step->__stateful = 1; +#else + step->__stateful = 0; +#endif + + return __GCONV_OK; +} +#endif + + +/* The default destructor function does nothing in the moment and so + we don't define it at all. But we still provide the macro just in + case we need it some day. */ +#if DEFINE_FINI +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_ARGS +# define EXTRA_LOOP_ARGS +#endif + + +/* This is the actual conversion function. */ +#ifndef FUNCTION_NAME +# define FUNCTION_NAME gconv +#endif + +/* The macros are used to access the function to convert single characters. */ +#define SINGLE(fct) SINGLE2 (fct) +#define SINGLE2(fct) fct##_single + + +extern int FUNCTION_NAME (struct __gconv_step *step, + struct __gconv_step_data *data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, + int do_flush, int consume_incomplete); +int +FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, int do_flush, + int consume_incomplete) +{ + struct __gconv_step *next_step = step + 1; + struct __gconv_step_data *next_data = data + 1; + __gconv_fct fct = NULL; + int status; + + if ((data->__flags & __GCONV_IS_LAST) == 0) + { + fct = next_step->__fct; +#ifdef PTR_DEMANGLE + if (next_step->__shlib_handle != NULL) + PTR_DEMANGLE (fct); +#endif + } + + /* If the function is called with no input this means we have to reset + to the initial state. The possibly partly converted input is + dropped. */ + if (__glibc_unlikely (do_flush)) + { + /* This should never happen during error handling. */ + assert (outbufstart == NULL); + + status = __GCONV_OK; + +#ifdef EMIT_SHIFT_TO_INIT + if (do_flush == 1) + { + /* We preserve the initial values of the pointer variables. */ + unsigned char *outbuf = data->__outbuf; + unsigned char *outstart = outbuf; + unsigned char *outend = data->__outbufend; + +# ifdef PREPARE_LOOP + PREPARE_LOOP +# endif + +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +# endif + + /* Emit the escape sequence to reset the state. */ + EMIT_SHIFT_TO_INIT; + + /* Call the steps down the chain if there are any but only if we + successfully emitted the escape sequence. This should only + fail if the output buffer is full. If the input is invalid + it should be discarded since the user wants to start from a + clean state. */ + if (status == __GCONV_OK) + { + if (data->__flags & __GCONV_IS_LAST) + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + else + { + /* Write out all output which was produced. */ + if (outbuf > outstart) + { + const unsigned char *outerr = outstart; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, + &outerr, outbuf, NULL, + irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__glibc_unlikely (outerr != outbuf)) + { + /* We have a problem. Undo the conversion. */ + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + } + + /* Change the status. */ + status = result; + } + } + + if (status == __GCONV_OK) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, + NULL, NULL, irreversible, 1, + consume_incomplete)); + } + } + } + else +#endif + { + /* Clear the state object. There might be bytes in there from + previous calls with CONSUME_INCOMPLETE == 1. But don't emit + escape sequences. */ + memset (data->__statep, '\0', sizeof (*data->__statep)); + + if (! (data->__flags & __GCONV_IS_LAST)) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, + NULL, irreversible, do_flush, + consume_incomplete)); + } + } + else + { + /* We preserve the initial values of the pointer variables, + but only some conversion modules need it. */ + const unsigned char *inptr __attribute__ ((__unused__)) = *inptrp; + unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1) + ? data->__outbuf : *outbufstart); + unsigned char *outend = data->__outbufend; + unsigned char *outstart; + /* This variable is used to count the number of characters we + actually converted. */ + size_t lirreversible = 0; + size_t *lirreversiblep = irreversible ? &lirreversible : NULL; + + /* The following assumes that encodings, which have a variable length + what might unalign a buffer even though it is an aligned in the + beginning, either don't have the minimal number of bytes as a divisor + of the maximum length or have a minimum length of 1. This is true + for all known and supported encodings. + We use && instead of || to combine the subexpression for the FROM + encoding and for the TO encoding, because usually one of them is + INTERNAL, for which the subexpression evaluates to 1, but INTERNAL + buffers are always aligned correctly. */ +#define POSSIBLY_UNALIGNED \ + (!_STRING_ARCH_unaligned \ + && (((FROM_LOOP_MIN_NEEDED_FROM != 1 \ + && FROM_LOOP_MAX_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_FROM == 0) \ + && (FROM_LOOP_MIN_NEEDED_TO != 1 \ + && FROM_LOOP_MAX_NEEDED_TO % FROM_LOOP_MIN_NEEDED_TO == 0)) \ + || ((TO_LOOP_MIN_NEEDED_FROM != 1 \ + && TO_LOOP_MAX_NEEDED_FROM % TO_LOOP_MIN_NEEDED_FROM == 0) \ + && (TO_LOOP_MIN_NEEDED_TO != 1 \ + && TO_LOOP_MAX_NEEDED_TO % TO_LOOP_MIN_NEEDED_TO == 0)))) +#if POSSIBLY_UNALIGNED + int unaligned; +# define GEN_unaligned(name) GEN_unaligned2 (name) +# define GEN_unaligned2(name) name##_unaligned +#else +# define unaligned 0 +#endif + +#ifdef PREPARE_LOOP + PREPARE_LOOP +#endif + +#if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1 + /* If the function is used to implement the mb*towc*() or wc*tomb*() + functions we must test whether any bytes from the last call are + stored in the `state' object. */ + if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1) + || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION)) + && consume_incomplete && (data->__statep->__count & 7) != 0) + { + /* Yep, we have some bytes left over. Process them now. + But this must not happen while we are called from an + error handler. */ + assert (outbufstart == NULL); + +# if FROM_LOOP_MAX_NEEDED_FROM > 1 + if (TO_LOOP_MAX_NEEDED_FROM == 1 || FROM_DIRECTION) + status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep + EXTRA_LOOP_ARGS); +# endif +# if !ONE_DIRECTION +# if FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1 + else +# endif +# if TO_LOOP_MAX_NEEDED_FROM > 1 + status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep EXTRA_LOOP_ARGS); +# endif +# endif + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + return status; + } +#endif + +#if POSSIBLY_UNALIGNED + unaligned = + ((FROM_DIRECTION + && ((uintptr_t) inptr % FROM_LOOP_MIN_NEEDED_FROM != 0 + || ((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % FROM_LOOP_MIN_NEEDED_TO != 0))) + || (!FROM_DIRECTION + && (((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % TO_LOOP_MIN_NEEDED_TO != 0) + || (uintptr_t) inptr % TO_LOOP_MIN_NEEDED_FROM != 0))); +#endif + + while (1) + { + /* Remember the start value for this round. */ + inptr = *inptrp; + /* The outbuf buffer is empty. */ + outstart = outbuf; + +#ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +#endif + + if (__glibc_likely (!unaligned)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + } +#if POSSIBLY_UNALIGNED + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + } +#endif + + /* If we were called as part of an error handling module we + don't do anything else here. */ + if (__glibc_unlikely (outbufstart != NULL)) + { + *outbufstart = outbuf; + return status; + } + + /* We finished one use of the loops. */ + ++data->__invocation_counter; + + /* If this is the last step leave the loop, there is nothing + we can do. */ + if (__glibc_unlikely (data->__flags & __GCONV_IS_LAST)) + { + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + + /* Remember how many non-identical characters we + converted in an irreversible way. */ + *irreversible += lirreversible; + + break; + } + + /* Write out all output which was produced. */ + if (__glibc_likely (outbuf > outstart)) + { + const unsigned char *outerr = data->__outbuf; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, + outbuf, NULL, irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__glibc_unlikely (outerr != outbuf)) + { +#ifdef RESET_INPUT_BUFFER + RESET_INPUT_BUFFER; +#else + /* We have a problem in one of the functions below. + Undo the conversion upto the error point. */ + size_t nstatus __attribute__ ((unused)); + + /* Reload the pointers. */ + *inptrp = inptr; + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + + if (__glibc_likely (!unaligned)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = FROM_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = TO_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# if POSSIBLY_UNALIGNED + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = GEN_unaligned (FROM_LOOP) (step, data, + inptrp, inend, + &outbuf, + outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = GEN_unaligned (TO_LOOP) (step, data, + inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# endif + + /* We must run out of output buffer space in this + rerun. */ + assert (outbuf == outerr); + assert (nstatus == __GCONV_FULL_OUTPUT); + + /* If we haven't consumed a single byte decrement + the invocation counter. */ + if (__glibc_unlikely (outbuf == outstart)) + --data->__invocation_counter; +#endif /* reset input buffer */ + } + + /* Change the status. */ + status = result; + } + else + /* All the output is consumed, we can make another run + if everything was ok. */ + if (status == __GCONV_FULL_OUTPUT) + { + status = __GCONV_OK; + outbuf = data->__outbuf; + } + } + + if (status != __GCONV_OK) + break; + + /* Reset the output buffer pointer for the next round. */ + outbuf = data->__outbuf; + } + +#ifdef END_LOOP + END_LOOP +#endif + + /* If we are supposed to consume all character store now all of the + remaining characters in the `state' object. */ +#if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1 + if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1) + || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION)) + && __builtin_expect (consume_incomplete, 0) + && status == __GCONV_INCOMPLETE_INPUT) + { +# ifdef STORE_REST + mbstate_t *state = data->__statep; + + STORE_REST +# else + /* Make sure the remaining bytes fit into the state objects + buffer. */ + assert (inend - *inptrp < 4); + + size_t cnt; + for (cnt = 0; *inptrp < inend; ++cnt) + data->__statep->__value.__wchb[cnt] = *(*inptrp)++; + data->__statep->__count &= ~7; + data->__statep->__count |= cnt; +# endif + } +#endif +#undef unaligned +#undef POSSIBLY_UNALIGNED + } + + return status; +} + +#undef DEFINE_INIT +#undef CHARSET_NAME +#undef DEFINE_FINI +#undef MIN_NEEDED_FROM +#undef MIN_NEEDED_TO +#undef MAX_NEEDED_FROM +#undef MAX_NEEDED_TO +#undef FROM_LOOP_MIN_NEEDED_FROM +#undef FROM_LOOP_MAX_NEEDED_FROM +#undef FROM_LOOP_MIN_NEEDED_TO +#undef FROM_LOOP_MAX_NEEDED_TO +#undef TO_LOOP_MIN_NEEDED_FROM +#undef TO_LOOP_MAX_NEEDED_FROM +#undef TO_LOOP_MIN_NEEDED_TO +#undef TO_LOOP_MAX_NEEDED_TO +#undef FROM_DIRECTION +#undef EMIT_SHIFT_TO_INIT +#undef FROM_LOOP +#undef TO_LOOP +#undef ONE_DIRECTION +#undef SAVE_RESET_STATE +#undef RESET_INPUT_BUFFER +#undef FUNCTION_NAME +#undef PREPARE_LOOP +#undef END_LOOP +#undef EXTRA_LOOP_ARGS +#undef STORE_REST +#undef FROM_ONEBYTE diff --git a/REORG.TODO/iconv/strtab.c b/REORG.TODO/iconv/strtab.c new file mode 100644 index 0000000000..89b28c5dde --- /dev/null +++ b/REORG.TODO/iconv/strtab.c @@ -0,0 +1,339 @@ +/* C string table handling. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + Written by Ulrich Drepper <drepper@redhat.com>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <inttypes.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/cdefs.h> +#include <sys/param.h> + + +struct Strent +{ + const char *string; + size_t len; + struct Strent *next; + struct Strent *left; + struct Strent *right; + size_t offset; + char reverse[0]; +}; + + +struct memoryblock +{ + struct memoryblock *next; + char memory[0]; +}; + + +struct Strtab +{ + struct Strent *root; + struct memoryblock *memory; + char *backp; + size_t left; + size_t total; + + struct Strent null; +}; + + +/* Cache for the pagesize. We correct this value a bit so that `malloc' + is not allocating more than a page. */ +static size_t ps; + + +#include <programs/xmalloc.h> + +/* Prototypes for our functions that are used from iconvconfig.c. If + you change these, change also iconvconfig.c. */ +/* Create new C string table object in memory. */ +extern struct Strtab *strtabinit (void); + +/* Free resources allocated for C string table ST. */ +extern void strtabfree (struct Strtab *st); + +/* Add string STR (length LEN is != 0) to C string table ST. */ +extern struct Strent *strtabadd (struct Strtab *st, const char *str, + size_t len); + +/* Finalize string table ST and store size in *SIZE and return a pointer. */ +extern void *strtabfinalize (struct Strtab *st, size_t *size); + +/* Get offset in string table for string associated with SE. */ +extern size_t strtaboffset (struct Strent *se); + + +struct Strtab * +strtabinit (void) +{ + struct Strtab *ret; + + if (ps == 0) + { + ps = sysconf (_SC_PAGESIZE) - 2 * sizeof (void *); + assert (sizeof (struct memoryblock) < ps); + } + + ret = (struct Strtab *) calloc (1, sizeof (struct Strtab)); + if (ret != NULL) + { + ret->null.len = 1; + ret->null.string = ""; + } + return ret; +} + + +static void +morememory (struct Strtab *st, size_t len) +{ + struct memoryblock *newmem; + + if (len < ps) + len = ps; + newmem = (struct memoryblock *) malloc (len); + if (newmem == NULL) + abort (); + + newmem->next = st->memory; + st->memory = newmem; + st->backp = newmem->memory; + st->left = len - offsetof (struct memoryblock, memory); +} + + +void +strtabfree (struct Strtab *st) +{ + struct memoryblock *mb = st->memory; + + while (mb != NULL) + { + void *old = mb; + mb = mb->next; + free (old); + } + + free (st); +} + + +static struct Strent * +newstring (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + size_t align; + int i; + + /* Compute the amount of padding needed to make the structure aligned. */ + align = ((__alignof__ (struct Strent) + - (((uintptr_t) st->backp) + & (__alignof__ (struct Strent) - 1))) + & (__alignof__ (struct Strent) - 1)); + + /* Make sure there is enough room in the memory block. */ + if (st->left < align + sizeof (struct Strent) + len) + { + morememory (st, sizeof (struct Strent) + len); + align = 0; + } + + /* Create the reserved string. */ + newstr = (struct Strent *) (st->backp + align); + newstr->string = str; + newstr->len = len; + newstr->next = NULL; + newstr->left = NULL; + newstr->right = NULL; + newstr->offset = 0; + for (i = len - 2; i >= 0; --i) + newstr->reverse[i] = str[len - 2 - i]; + newstr->reverse[len - 1] = '\0'; + st->backp += align + sizeof (struct Strent) + len; + st->left -= align + sizeof (struct Strent) + len; + + return newstr; +} + + +/* XXX This function should definitely be rewritten to use a balancing + tree algorithm (AVL, red-black trees). For now a simple, correct + implementation is enough. */ +static struct Strent ** +searchstring (struct Strent **sep, struct Strent *newstr) +{ + int cmpres; + + /* More strings? */ + if (*sep == NULL) + { + *sep = newstr; + return sep; + } + + /* Compare the strings. */ + cmpres = memcmp ((*sep)->reverse, newstr->reverse, + MIN ((*sep)->len, newstr->len) - 1); + if (cmpres == 0) + /* We found a matching string. */ + return sep; + else if (cmpres > 0) + return searchstring (&(*sep)->left, newstr); + else + return searchstring (&(*sep)->right, newstr); +} + + +/* Add new string. The actual string is assumed to be permanent. */ +struct Strent * +strtabadd (struct Strtab *st, const char *str, size_t len) +{ + struct Strent *newstr; + struct Strent **sep; + + /* Compute the string length if the caller doesn't know it. */ + if (len == 0) + len = strlen (str) + 1; + + /* Make sure all "" strings get offset 0. */ + if (len == 1) + return &st->null; + + /* Allocate memory for the new string and its associated information. */ + newstr = newstring (st, str, len); + + /* Search in the array for the place to insert the string. If there + is no string with matching prefix and no string with matching + leading substring, create a new entry. */ + sep = searchstring (&st->root, newstr); + if (*sep != newstr) + { + /* This is not the same entry. This means we have a prefix match. */ + if ((*sep)->len > newstr->len) + { + struct Strent *subs; + + for (subs = (*sep)->next; subs; subs = subs->next) + if (subs->len == newstr->len) + { + /* We have an exact match with a substring. Free the memory + we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + return subs; + } + + /* We have a new substring. This means we don't need the reverse + string of this entry anymore. */ + st->backp -= newstr->len; + st->left += newstr->len; + + newstr->next = (*sep)->next; + (*sep)->next = newstr; + } + else if ((*sep)->len != newstr->len) + { + /* When we get here it means that the string we are about to + add has a common prefix with a string we already have but + it is longer. In this case we have to put it first. */ + st->total += newstr->len - (*sep)->len; + newstr->next = *sep; + newstr->left = (*sep)->left; + newstr->right = (*sep)->right; + *sep = newstr; + } + else + { + /* We have an exact match. Free the memory we allocated. */ + st->left += st->backp - (char *) newstr; + st->backp = (char *) newstr; + + newstr = *sep; + } + } + else + st->total += newstr->len; + + return newstr; +} + + +static void +copystrings (struct Strent *nodep, char **freep, size_t *offsetp) +{ + struct Strent *subs; + + if (nodep->left != NULL) + copystrings (nodep->left, freep, offsetp); + + /* Process the current node. */ + nodep->offset = *offsetp; + *freep = (char *) mempcpy (*freep, nodep->string, nodep->len); + *offsetp += nodep->len; + + for (subs = nodep->next; subs != NULL; subs = subs->next) + { + assert (subs->len < nodep->len); + subs->offset = nodep->offset + nodep->len - subs->len; + } + + if (nodep->right != NULL) + copystrings (nodep->right, freep, offsetp); +} + + +void * +strtabfinalize (struct Strtab *st, size_t *size) +{ + size_t copylen; + char *endp; + char *retval; + + /* Fill in the information. */ + endp = retval = (char *) xmalloc (st->total + 1); + + /* Always put an empty string at the beginning so that a zero offset + can mean error. */ + *endp++ = '\0'; + + /* Now run through the tree and add all the string while also updating + the offset members of the elfstrent records. */ + copylen = 1; + copystrings (st->root, &endp, ©len); + assert (copylen == st->total + 1); + assert (endp == retval + st->total + 1); + *size = copylen; + + return retval; +} + + +size_t +strtaboffset (struct Strent *se) +{ + return se->offset; +} diff --git a/REORG.TODO/iconv/tst-iconv1.c b/REORG.TODO/iconv/tst-iconv1.c new file mode 100644 index 0000000000..0609f50e50 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv1.c @@ -0,0 +1,47 @@ +/* Test case by yaoz@nih.gov. */ + +#include <iconv.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +static int +do_test (void) +{ + char utf8[5]; + wchar_t ucs4[5]; + iconv_t cd; + char *inbuf; + char *outbuf; + size_t inbytes; + size_t outbytes; + size_t n; + + strcpy (utf8, "abcd"); + + /* From UTF8 to UCS4. */ + cd = iconv_open ("UCS4", "UTF8"); + if (cd == (iconv_t) -1) + { + perror ("iconv_open"); + return 1; + } + + inbuf = utf8; + inbytes = 4; + outbuf = (char *) ucs4; + outbytes = 4 * sizeof (wchar_t); /* "Argument list too long" error. */ + n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes); + if (n == (size_t) -1) + { + printf ("iconv: %m\n"); + iconv_close (cd); + return 1; + } + iconv_close (cd); + + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv2.c b/REORG.TODO/iconv/tst-iconv2.c new file mode 100644 index 0000000000..af78d78350 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv2.c @@ -0,0 +1,102 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <iconv.h> +#include <mcheck.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +static int +do_test (void) +{ + char buf[3]; + const wchar_t wc[1] = L"a"; + iconv_t cd; + char *inptr; + size_t inlen; + char *outptr; + size_t outlen; + size_t n; + int e; + int result = 0; + + mtrace (); + + cd = iconv_open ("UCS4", "WCHAR_T"); + if (cd == (iconv_t) -1) + { + printf ("cannot convert from wchar_t to UCS4: %m\n"); + exit (1); + } + + inptr = (char *) wc; + inlen = sizeof (wchar_t); + outptr = buf; + outlen = 3; + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + e = errno; + + if (n != (size_t) -1) + { + printf ("incorrect iconv() return value: %zd, expected -1\n", n); + result = 1; + } + + if (e != E2BIG) + { + printf ("incorrect error value: %s, expected %s\n", + strerror (e), strerror (E2BIG)); + result = 1; + } + + if (inptr != (char *) wc) + { + puts ("inptr changed"); + result = 1; + } + + if (inlen != sizeof (wchar_t)) + { + puts ("inlen changed"); + result = 1; + } + + if (outptr != buf) + { + puts ("outptr changed"); + result = 1; + } + + if (outlen != 3) + { + puts ("outlen changed"); + result = 1; + } + + iconv_close (cd); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv3.c b/REORG.TODO/iconv/tst-iconv3.c new file mode 100644 index 0000000000..b06f75f0bc --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv3.c @@ -0,0 +1,56 @@ +/* Contributed by Owen Taylor <otaylor@redhat.com>. */ + +#include <iconv.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> + +#define BUFSIZE 10000 + +static int +do_test (void) +{ + char inbuf[BUFSIZE]; + wchar_t outbuf[BUFSIZE]; + + iconv_t cd; + int i; + char *inptr; + char *outptr; + size_t inbytes_left, outbytes_left; + int count; + int result = 0; + + for (i=0; i < BUFSIZE; i++) + inbuf[i] = 'a'; + + cd = iconv_open ("UCS-4LE", "UTF-8"); + + inbytes_left = BUFSIZE; + outbytes_left = BUFSIZE * 4; + inptr = inbuf; + outptr = (char *) outbuf; + + count = iconv (cd, &inptr, &inbytes_left, &outptr, &outbytes_left); + + if (count < 0) + { + if (errno == E2BIG) + printf ("Received E2BIG\n"); + else + printf ("Received something else\n"); + + printf ("inptr change: %td\n", inptr - inbuf); + printf ("inlen change: %zd\n", BUFSIZE - inbytes_left); + printf ("outptr change: %td\n", outptr - (char *) outbuf); + printf ("outlen change: %zd\n", BUFSIZE * 4 - outbytes_left); + result = 1; + } + else + printf ("Succeeded\n"); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv4.c b/REORG.TODO/iconv/tst-iconv4.c new file mode 100644 index 0000000000..b5ff39306c --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv4.c @@ -0,0 +1,65 @@ +// Derived from BZ #9793 +#include <errno.h> +#include <iconv.h> +#include <stdio.h> + + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + if (cd == (iconv_t) -1) + { + puts ("iconv_open failed"); + return 1; + } + + char input[2] = { 0xc2, 0xae }; /* Registered trademark */ + char *inptr = input; + size_t insize = sizeof (input); + char output[2]; /* Too short to contain "(R)". */ + char *outptr = output; + size_t outsize = sizeof (output); + + size_t ret = iconv (cd, &inptr, &insize, &outptr, &outsize); + if (ret != (size_t) -1) + { + puts ("iconv succeeded"); + return 1; + } + if (errno != E2BIG) + { + puts ("iconv did not set errno to E2BIG"); + return 1; + } + int res = 0; + if (inptr != input) + { + puts ("inptr changed"); + res = 1; + } + if (insize != sizeof (input)) + { + puts ("insize changed"); + res = 1; + } + if (outptr != output) + { + puts ("outptr changed"); + res = 1; + } + if (outsize != sizeof (output)) + { + puts ("outsize changed"); + res = 1; + } + if (iconv_close (cd) == -1) + { + puts ("iconv_close failed"); + res = 1; + } + return res; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv5.c b/REORG.TODO/iconv/tst-iconv5.c new file mode 100644 index 0000000000..52f93d6695 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv5.c @@ -0,0 +1,161 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by GOTO Masanori <gotom@debian.or.jp>, 2004 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <iconv.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> + +#define SIZE 256 /* enough room for conversion */ +#define SAMPLESTR "abc" + +struct unalign +{ + char str1[1]; + char str2[SIZE]; +}; + +struct convcode +{ + const char *tocode; + const char *fromcode; +}; + +/* test builtin transformation */ +static const struct convcode testcode[] = { + {"ASCII", "ASCII"}, + {"UTF-8", "ASCII"}, + {"UCS-2BE", "ASCII"}, + {"UCS-2LE", "ASCII"}, + {"UCS-4BE", "ASCII"}, + {"UCS-4LE", "ASCII"}, +}; + +static const int number = (int) sizeof (testcode) / sizeof (struct convcode); + +static int +convert (const char *tocode, const char *fromcode, char *inbufp, + size_t inbytesleft, char *outbufp, size_t outbytesleft) +{ + iconv_t *ic; + size_t outbytes = outbytesleft; + int ret; + + ic = iconv_open (tocode, fromcode); + if (ic == (iconv_t *) - 1) + { + printf ("iconv_open failed: from: %s, to: %s: %s", + fromcode, tocode, strerror (errno)); + return -1; + } + + while (inbytesleft > 0) + { + ret = iconv (ic, &inbufp, &inbytesleft, &outbufp, &outbytes); + if (ret == -1) + { + printf ("iconv failed: from: %s, to: %s: %s", + fromcode, tocode, strerror (errno)); + return -1; + } + } + + ret = iconv_close (ic); + if (ret == -1) + { + printf ("iconv_close failed: from: %s, to: %s: %s", + fromcode, tocode, strerror (errno)); + return -1; + } + + return outbytesleft - outbytes; +} + + +static int +test_unalign (const struct convcode *codes, const char *str, int len) +{ + struct unalign *inbufp, *outbufp; + char *inbuf, *outbuf; + size_t inbytesleft, outbytesleft; + int retlen; + + /* allocating unaligned buffer for both inbuf and outbuf */ + inbufp = (struct unalign *) malloc (sizeof (struct unalign)); + if (!inbufp) + { + printf ("no memory available\n"); + exit (1); + } + inbuf = inbufp->str2; + + outbufp = (struct unalign *) malloc (sizeof (struct unalign)); + if (!outbufp) + { + printf ("no memory available\n"); + exit (1); + } + outbuf = outbufp->str2; + + /* first iconv phase */ + memcpy (inbuf, str, len); + inbytesleft = len; + outbytesleft = sizeof (struct unalign); + retlen = convert (codes->tocode, codes->fromcode, inbuf, inbytesleft, + outbuf, outbytesleft); + if (retlen == -1) /* failed */ + return 1; + + /* second round trip iconv phase */ + memcpy (inbuf, outbuf, retlen); + inbytesleft = retlen; + outbytesleft = sizeof (struct unalign); + retlen = convert (codes->fromcode, codes->tocode, inbuf, inbytesleft, + outbuf, outbytesleft); + if (retlen == -1) /* failed */ + return 1; + + free (inbufp); + free (outbufp); + + return 0; +} + +static int +do_test (void) +{ + int i; + int ret = 0; + + for (i = 0; i < number; i++) + { + ret = test_unalign (&testcode[i], (char *) SAMPLESTR, sizeof (SAMPLESTR)); + if (ret) + break; + printf ("iconv: %s <-> %s: ok\n", + testcode[i].fromcode, testcode[i].tocode); + } + if (ret == 0) + printf ("Succeeded.\n"); + + return ret; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/iconv/tst-iconv6.c b/REORG.TODO/iconv/tst-iconv6.c new file mode 100644 index 0000000000..ace7dc68b2 --- /dev/null +++ b/REORG.TODO/iconv/tst-iconv6.c @@ -0,0 +1,118 @@ +/* Testing ucs4le_internal_loop() in gconv_simple.c. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <inttypes.h> +#include <iconv.h> +#include <byteswap.h> +#include <endian.h> + +static int +do_test (void) +{ + iconv_t cd; + char *inptr; + size_t inlen; + char *outptr; + size_t outlen; + size_t n; + int e; + int result = 0; + +#if __BYTE_ORDER == __BIG_ENDIAN + /* On big-endian machines, ucs4le_internal_loop() swaps the bytes before + error checking. Thus the input values has to be swapped. */ +# define VALUE(val) bswap_32 (val) +#else +# define VALUE(val) val +#endif + uint32_t inbuf[3] = { VALUE (0x41), VALUE (0x80000000), VALUE (0x42) }; + uint32_t outbuf[3] = { 0, 0, 0 }; + + cd = iconv_open ("WCHAR_T", "UCS-4LE"); + if (cd == (iconv_t) -1) + { + printf ("cannot convert from UCS4LE to wchar_t: %m\n"); + return 1; + } + + inptr = (char *) inbuf; + inlen = sizeof (inbuf); + outptr = (char *) outbuf; + outlen = sizeof (outbuf); + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + e = errno; + + if (n != (size_t) -1) + { + printf ("incorrect iconv() return value: %zd, expected -1\n", n); + result = 1; + } + + if (e != EILSEQ) + { + printf ("incorrect error value: %s, expected %s\n", + strerror (e), strerror (EILSEQ)); + result = 1; + } + + if (inptr != (char *) &inbuf[1]) + { + printf ("inptr=0x%p does not point to invalid character! Expected=0x%p\n" + , inptr, &inbuf[1]); + result = 1; + } + + if (inlen != sizeof (inbuf) - sizeof (uint32_t)) + { + printf ("inlen=%zd != %zd\n" + , inlen, sizeof (inbuf) - sizeof (uint32_t)); + result = 1; + } + + if (outptr != (char *) &outbuf[1]) + { + printf ("outptr=0x%p does not point to invalid character in inbuf! " + "Expected=0x%p\n" + , outptr, &outbuf[1]); + result = 1; + } + + if (outlen != sizeof (inbuf) - sizeof (uint32_t)) + { + printf ("outlen=%zd != %zd\n" + , outlen, sizeof (outbuf) - sizeof (uint32_t)); + result = 1; + } + + if (outbuf[0] != 0x41 || outbuf[1] != 0 || outbuf[2] != 0) + { + puts ("Characters conversion is incorrect!"); + result = 1; + } + + iconv_close (cd); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" |