diff options
Diffstat (limited to 'iconv/iconv_prog.c')
-rw-r--r-- | iconv/iconv_prog.c | 803 |
1 files changed, 0 insertions, 803 deletions
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c deleted file mode 100644 index 1397d2e9bd..0000000000 --- a/iconv/iconv_prog.c +++ /dev/null @@ -1,803 +0,0 @@ -/* Convert text in given files from the specified from-set to the to-set. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. */ - -#include <argp.h> -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <error.h> -#include <fcntl.h> -#include <iconv.h> -#include <langinfo.h> -#include <locale.h> -#include <search.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <libintl.h> -#ifdef _POSIX_MAPPED_FILES -# include <sys/mman.h> -#endif -#include <charmap.h> -#include <gconv_int.h> -#include "iconv_prog.h" -#include "iconvconfig.h" - -/* Get libc version number. */ -#include "../version.h" - -#define PACKAGE _libc_intl_domainname - - -/* Name and version of program. */ -static void print_version (FILE *stream, struct argp_state *state); -void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; - -#define OPT_VERBOSE 1000 -#define OPT_LIST 'l' - -/* Definitions of arguments for argp functions. */ -static const struct argp_option options[] = -{ - { NULL, 0, NULL, 0, N_("Input/Output format specification:") }, - { "from-code", 'f', N_("NAME"), 0, N_("encoding of original text") }, - { "to-code", 't', N_("NAME"), 0, N_("encoding for output") }, - { NULL, 0, NULL, 0, N_("Information:") }, - { "list", 'l', NULL, 0, N_("list all known coded character sets") }, - { NULL, 0, NULL, 0, N_("Output control:") }, - { NULL, 'c', NULL, 0, N_("omit invalid characters from output") }, - { "output", 'o', N_("FILE"), 0, N_("output file") }, - { "silent", 's', NULL, 0, N_("suppress warnings") }, - { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, - { NULL, 0, NULL, 0, NULL } -}; - -/* Short description of program. */ -static const char doc[] = N_("\ -Convert encoding of given files from one encoding to another."); - -/* Strings for arguments in help texts. */ -static const char args_doc[] = N_("[FILE...]"); - -/* Prototype for option handler. */ -static error_t parse_opt (int key, char *arg, struct argp_state *state); - -/* Function to print some extra text in the help message. */ -static char *more_help (int key, const char *text, void *input); - -/* Data structure to communicate with argp functions. */ -static struct argp argp = -{ - options, parse_opt, args_doc, doc, NULL, more_help -}; - -/* Code sets to convert from and to respectively. An empty string as the - default causes the 'iconv_open' function to look up the charset of the - currently selected locale and use it. */ -static const char *from_code = ""; -static const char *to_code = ""; - -/* File to write output to. If NULL write to stdout. */ -static const char *output_file; - -/* Nonzero if verbose ouput is wanted. */ -int verbose; - -/* Nonzero if list of all coded character sets is wanted. */ -static int list; - -/* If nonzero omit invalid character from output. */ -int omit_invalid; - -/* Prototypes for the functions doing the actual work. */ -static int process_block (iconv_t cd, char *addr, size_t len, FILE **output, - const char *output_file); -static int process_fd (iconv_t cd, int fd, FILE **output, - const char *output_file); -static int process_file (iconv_t cd, FILE *input, FILE **output, - const char *output_file); -static void print_known_names (void) internal_function; - - -int -main (int argc, char *argv[]) -{ - int status = EXIT_SUCCESS; - int remaining; - iconv_t cd; - const char *orig_to_code; - struct charmap_t *from_charmap = NULL; - struct charmap_t *to_charmap = NULL; - - /* Set locale via LC_ALL. */ - setlocale (LC_ALL, ""); - - /* Set the text message domain. */ - textdomain (_libc_intl_domainname); - - /* Parse and process arguments. */ - argp_parse (&argp, argc, argv, 0, &remaining, NULL); - - /* List all coded character sets if wanted. */ - if (list) - { - print_known_names (); - exit (EXIT_SUCCESS); - } - - /* If we have to ignore errors make sure we use the appropriate name for - the to-character-set. */ - orig_to_code = to_code; - if (omit_invalid) - { - const char *errhand = strchrnul (to_code, '/'); - int nslash = 2; - char *newp; - char *cp; - - if (*errhand == '/') - { - --nslash; - errhand = strchrnul (errhand + 1, '/'); - - if (*errhand == '/') - { - --nslash; - errhand = strchr (errhand, '\0'); - } - } - - newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); - cp = mempcpy (newp, to_code, errhand - to_code); - while (nslash-- > 0) - *cp++ = '/'; - if (cp[-1] != '/') - *cp++ = ','; - memcpy (cp, "IGNORE", sizeof ("IGNORE")); - - to_code = newp; - } - - /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f - can be file names of charmaps. In this case iconv will have to read - those charmaps and use them to do the conversion. But there are - holes in the specification. There is nothing said that if -f is a - charmap filename that -t must be, too. And vice versa. There is - also no word about the symbolic names used. What if they don't - match? */ - if (strchr (from_code, '/') != NULL) - /* The from-name might be a charmap file name. Try reading the - file. */ - from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); - - if (strchr (orig_to_code, '/') != NULL) - /* The to-name might be a charmap file name. Try reading the - file. */ - to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); - - - /* At this point we have to handle two cases. The first one is - where a charmap is used for the from- or to-charset, or both. We - handle this special since it is very different from the sane way of - doing things. The other case allows converting using the iconv() - function. */ - if (from_charmap != NULL || to_charmap != NULL) - /* Construct the conversion table and do the conversion. */ - status = charmap_conversion (from_code, from_charmap, to_code, to_charmap, - argc, remaining, argv, output_file); - else - { - /* Let's see whether we have these coded character sets. */ - cd = iconv_open (to_code, from_code); - if (cd == (iconv_t) -1) - { - if (errno == EINVAL) - { - /* Try to be nice with the user and tell her which of the - two encoding names is wrong. This is possible because - all supported encodings can be converted from/to Unicode, - in other words, because the graph of encodings is - connected. */ - bool from_wrong = - (iconv_open ("UTF-8", from_code) == (iconv_t) -1 - && errno == EINVAL); - bool to_wrong = - (iconv_open (to_code, "UTF-8") == (iconv_t) -1 - && errno == EINVAL); - const char *from_pretty = - (from_code[0] ? from_code : nl_langinfo (CODESET)); - const char *to_pretty = - (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); - - if (from_wrong) - { - if (to_wrong) - error (0, 0, - _("\ -conversions from `%s' and to `%s' are not supported"), - from_pretty, to_pretty); - else - error (0, 0, - _("conversion from `%s' is not supported"), - from_pretty); - } - else - { - if (to_wrong) - error (0, 0, - _("conversion to `%s' is not supported"), - to_pretty); - else - error (0, 0, - _("conversion from `%s' to `%s' is not supported"), - from_pretty, to_pretty); - } - - argp_help (&argp, stderr, ARGP_HELP_SEE, - program_invocation_short_name); - exit (1); - } - else - error (EXIT_FAILURE, errno, - _("failed to start conversion processing")); - } - - /* The output file. Will be opened when we are ready to produce - output. */ - FILE *output = NULL; - - /* Now process the remaining files. Write them to stdout or the file - specified with the `-o' parameter. If we have no file given as - the parameter process all from stdin. */ - if (remaining == argc) - { - if (process_file (cd, stdin, &output, output_file) != 0) - status = EXIT_FAILURE; - } - else - do - { -#ifdef _POSIX_MAPPED_FILES - struct stat64 st; - char *addr; -#endif - int fd, ret; - - if (verbose) - fprintf (stderr, "%s:\n", argv[remaining]); - if (strcmp (argv[remaining], "-") == 0) - fd = 0; - else - { - fd = open (argv[remaining], O_RDONLY); - - if (fd == -1) - { - error (0, errno, _("cannot open input file `%s'"), - argv[remaining]); - status = EXIT_FAILURE; - continue; - } - } - -#ifdef _POSIX_MAPPED_FILES - /* We have possibilities for reading the input file. First try - to mmap() it since this will provide the fastest solution. */ - if (fstat64 (fd, &st) == 0 - && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, - fd, 0)) != MAP_FAILED)) - { - /* Yes, we can use mmap(). The descriptor is not needed - anymore. */ - if (close (fd) != 0) - error (EXIT_FAILURE, errno, - _("error while closing input `%s'"), - argv[remaining]); - - ret = process_block (cd, addr, st.st_size, &output, - output_file); - - /* We don't need the input data anymore. */ - munmap ((void *) addr, st.st_size); - - if (ret != 0) - { - status = EXIT_FAILURE; - - if (ret < 0) - /* We cannot go on with producing output since it might - lead to problem because the last output might leave - the output stream in an undefined state. */ - break; - } - } - else -#endif /* _POSIX_MAPPED_FILES */ - { - /* Read the file in pieces. */ - ret = process_fd (cd, fd, &output, output_file); - - /* Now close the file. */ - close (fd); - - if (ret != 0) - { - /* Something went wrong. */ - status = EXIT_FAILURE; - - if (ret < 0) - /* We cannot go on with producing output since it might - lead to problem because the last output might leave - the output stream in an undefined state. */ - break; - } - } - } - while (++remaining < argc); - - /* Close the output file now. */ - if (output != NULL && fclose (output)) - error (EXIT_FAILURE, errno, _("error while closing output file")); - } - - return status; -} - - -/* Handle program arguments. */ -static error_t -parse_opt (int key, char *arg, struct argp_state *state) -{ - switch (key) - { - case 'f': - from_code = arg; - break; - case 't': - to_code = arg; - break; - case 'o': - output_file = arg; - break; - case 's': - /* Nothing, for now at least. We are not giving out any information - about missing character or so. */ - break; - case 'c': - /* Omit invalid characters from output. */ - omit_invalid = 1; - break; - case OPT_VERBOSE: - verbose = 1; - break; - case OPT_LIST: - list = 1; - break; - default: - return ARGP_ERR_UNKNOWN; - } - return 0; -} - - -static char * -more_help (int key, const char *text, void *input) -{ - char *tp = NULL; - switch (key) - { - case ARGP_KEY_HELP_EXTRA: - /* We print some extra information. */ - if (asprintf (&tp, gettext ("\ -For bug reporting instructions, please see:\n\ -%s.\n"), REPORT_BUGS_TO) < 0) - return NULL; - return tp; - default: - break; - } - return (char *) text; -} - - -/* Print the version information. */ -static void -print_version (FILE *stream, struct argp_state *state) -{ - fprintf (stream, "iconv %s%s\n", PKGVERSION, VERSION); - fprintf (stream, gettext ("\ -Copyright (C) %s Free Software Foundation, Inc.\n\ -This is free software; see the source for copying conditions. There is NO\n\ -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ -"), "2017"); - fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); -} - - -static int -write_output (const char *outbuf, const char *outptr, FILE **output, - const char *output_file) -{ - /* We have something to write out. */ - int errno_save = errno; - - if (*output == NULL) - { - /* Determine output file. */ - if (output_file != NULL && strcmp (output_file, "-") != 0) - { - *output = fopen (output_file, "w"); - if (*output == NULL) - error (EXIT_FAILURE, errno, _("cannot open output file")); - } - else - *output = stdout; - } - - if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf) - || ferror (*output)) - { - /* Error occurred while printing the result. */ - error (0, 0, _("\ -conversion stopped due to problem in writing the output")); - return -1; - } - - errno = errno_save; - - return 0; -} - - -static int -process_block (iconv_t cd, char *addr, size_t len, FILE **output, - const char *output_file) -{ -#define OUTBUF_SIZE 32768 - const char *start = addr; - char outbuf[OUTBUF_SIZE]; - char *outptr; - size_t outlen; - size_t n; - int ret = 0; - - while (len > 0) - { - outptr = outbuf; - outlen = OUTBUF_SIZE; - n = iconv (cd, &addr, &len, &outptr, &outlen); - - if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) - { - ret = 1; - if (len == 0) - n = 0; - else - errno = E2BIG; - } - - if (outptr != outbuf) - { - ret = write_output (outbuf, outptr, output, output_file); - if (ret != 0) - break; - } - - if (n != (size_t) -1) - { - /* All the input test is processed. For state-dependent - character sets we have to flush the state now. */ - outptr = outbuf; - outlen = OUTBUF_SIZE; - n = iconv (cd, NULL, NULL, &outptr, &outlen); - - if (outptr != outbuf) - { - ret = write_output (outbuf, outptr, output, output_file); - if (ret != 0) - break; - } - - if (n != (size_t) -1) - break; - - if (omit_invalid && errno == EILSEQ) - { - ret = 1; - break; - } - } - - if (errno != E2BIG) - { - /* iconv() ran into a problem. */ - switch (errno) - { - case EILSEQ: - if (! omit_invalid) - error (0, 0, _("illegal input sequence at position %ld"), - (long int) (addr - start)); - break; - case EINVAL: - error (0, 0, _("\ -incomplete character or shift sequence at end of buffer")); - break; - case EBADF: - error (0, 0, _("internal error (illegal descriptor)")); - break; - default: - error (0, 0, _("unknown iconv() error %d"), errno); - break; - } - - return -1; - } - } - - return ret; -} - - -static int -process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) -{ - /* we have a problem with reading from a desriptor since we must not - provide the iconv() function an incomplete character or shift - sequence at the end of the buffer. Since we have to deal with - arbitrary encodings we must read the whole text in a buffer and - process it in one step. */ - static char *inbuf = NULL; - static size_t maxlen = 0; - char *inptr = NULL; - size_t actlen = 0; - - while (actlen < maxlen) - { - ssize_t n = read (fd, inptr, maxlen - actlen); - - if (n == 0) - /* No more text to read. */ - break; - - if (n == -1) - { - /* Error while reading. */ - error (0, errno, _("error while reading the input")); - return -1; - } - - inptr += n; - actlen += n; - } - - if (actlen == maxlen) - while (1) - { - ssize_t n; - char *new_inbuf; - - /* Increase the buffer. */ - new_inbuf = (char *) realloc (inbuf, maxlen + 32768); - if (new_inbuf == NULL) - { - error (0, errno, _("unable to allocate buffer for input")); - return -1; - } - inbuf = new_inbuf; - maxlen += 32768; - inptr = inbuf + actlen; - - do - { - n = read (fd, inptr, maxlen - actlen); - - if (n == 0) - /* No more text to read. */ - break; - - if (n == -1) - { - /* Error while reading. */ - error (0, errno, _("error while reading the input")); - return -1; - } - - inptr += n; - actlen += n; - } - while (actlen < maxlen); - - if (n == 0) - /* Break again so we leave both loops. */ - break; - } - - /* Now we have all the input in the buffer. Process it in one run. */ - return process_block (cd, inbuf, actlen, output, output_file); -} - - -static int -process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file) -{ - /* This should be safe since we use this function only for `stdin' and - we haven't read anything so far. */ - return process_fd (cd, fileno (input), output, output_file); -} - - -/* Print all known character sets/encodings. */ -static void *printlist; -static size_t column; -static int not_first; - -static void -insert_print_list (const void *nodep, VISIT value, int level) -{ - if (value == leaf || value == postorder) - { - const struct gconv_alias *s = *(const struct gconv_alias **) nodep; - tsearch (s->fromname, &printlist, (__compar_fn_t) strverscmp); - } -} - -static void -do_print_human (const void *nodep, VISIT value, int level) -{ - if (value == leaf || value == postorder) - { - const char *s = *(const char **) nodep; - size_t len = strlen (s); - size_t cnt; - - while (len > 0 && s[len - 1] == '/') - --len; - - for (cnt = 0; cnt < len; ++cnt) - if (isalnum (s[cnt])) - break; - if (cnt == len) - return; - - if (not_first) - { - putchar (','); - ++column; - - if (column > 2 && column + len > 77) - { - fputs ("\n ", stdout); - column = 2; - } - else - { - putchar (' '); - ++column; - } - } - else - not_first = 1; - - fwrite (s, len, 1, stdout); - column += len; - } -} - -static void -do_print (const void *nodep, VISIT value, int level) -{ - if (value == leaf || value == postorder) - { - const char *s = *(const char **) nodep; - - puts (s); - } -} - -static void -internal_function -add_known_names (struct gconv_module *node) -{ - if (node->left != NULL) - add_known_names (node->left); - if (node->right != NULL) - add_known_names (node->right); - do - { - if (strcmp (node->from_string, "INTERNAL") != 0) - tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp); - if (strcmp (node->to_string, "INTERNAL") != 0) - tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp); - - node = node->same; - } - while (node != NULL); -} - - -static void -insert_cache (void) -{ - const struct gconvcache_header *header; - const char *strtab; - const struct hash_entry *hashtab; - size_t cnt; - - header = (const struct gconvcache_header *) __gconv_get_cache (); - strtab = (char *) header + header->string_offset; - hashtab = (struct hash_entry *) ((char *) header + header->hash_offset); - - for (cnt = 0; cnt < header->hash_size; ++cnt) - if (hashtab[cnt].string_offset != 0) - { - const char *str = strtab + hashtab[cnt].string_offset; - - if (strcmp (str, "INTERNAL") != 0) - tsearch (str, &printlist, (__compar_fn_t) strverscmp); - } -} - - -static void -internal_function -print_known_names (void) -{ - iconv_t h; - void *cache; - - /* We must initialize the internal databases first. */ - h = iconv_open ("L1", "L1"); - iconv_close (h); - - /* See whether we have a cache. */ - cache = __gconv_get_cache (); - if (cache != NULL) - /* Yep, use only this information. */ - insert_cache (); - else - { - struct gconv_module *modules; - - /* No, then use the information read from the gconv-modules file. - First add the aliases. */ - twalk (__gconv_get_alias_db (), insert_print_list); - - /* Add the from- and to-names from the known modules. */ - modules = __gconv_get_modules_db (); - if (modules != NULL) - add_known_names (modules); - } - - bool human_readable = isatty (fileno (stdout)); - - if (human_readable) - fputs (_("\ -The following list contains all the coded character sets known. This does\n\ -not necessarily mean that all combinations of these names can be used for\n\ -the FROM and TO command line parameters. One coded character set can be\n\ -listed with several different names (aliases).\n\n "), stdout); - - /* Now print the collected names. */ - column = 2; - twalk (printlist, human_readable ? do_print_human : do_print); - - if (human_readable && column != 0) - puts (""); -} |