aboutsummaryrefslogtreecommitdiff
path: root/iconv/iconv_prog.c
diff options
context:
space:
mode:
Diffstat (limited to 'iconv/iconv_prog.c')
-rw-r--r--iconv/iconv_prog.c803
1 files changed, 0 insertions, 803 deletions
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
deleted file mode 100644
index 1397d2e9bd..0000000000
--- a/iconv/iconv_prog.c
+++ /dev/null
@@ -1,803 +0,0 @@
-/* Convert text in given files from the specified from-set to the to-set.
- Copyright (C) 1998-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, see <http://www.gnu.org/licenses/>. */
-
-#include <argp.h>
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <error.h>
-#include <fcntl.h>
-#include <iconv.h>
-#include <langinfo.h>
-#include <locale.h>
-#include <search.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <libintl.h>
-#ifdef _POSIX_MAPPED_FILES
-# include <sys/mman.h>
-#endif
-#include <charmap.h>
-#include <gconv_int.h>
-#include "iconv_prog.h"
-#include "iconvconfig.h"
-
-/* Get libc version number. */
-#include "../version.h"
-
-#define PACKAGE _libc_intl_domainname
-
-
-/* Name and version of program. */
-static void print_version (FILE *stream, struct argp_state *state);
-void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
-
-#define OPT_VERBOSE 1000
-#define OPT_LIST 'l'
-
-/* Definitions of arguments for argp functions. */
-static const struct argp_option options[] =
-{
- { NULL, 0, NULL, 0, N_("Input/Output format specification:") },
- { "from-code", 'f', N_("NAME"), 0, N_("encoding of original text") },
- { "to-code", 't', N_("NAME"), 0, N_("encoding for output") },
- { NULL, 0, NULL, 0, N_("Information:") },
- { "list", 'l', NULL, 0, N_("list all known coded character sets") },
- { NULL, 0, NULL, 0, N_("Output control:") },
- { NULL, 'c', NULL, 0, N_("omit invalid characters from output") },
- { "output", 'o', N_("FILE"), 0, N_("output file") },
- { "silent", 's', NULL, 0, N_("suppress warnings") },
- { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
- { NULL, 0, NULL, 0, NULL }
-};
-
-/* Short description of program. */
-static const char doc[] = N_("\
-Convert encoding of given files from one encoding to another.");
-
-/* Strings for arguments in help texts. */
-static const char args_doc[] = N_("[FILE...]");
-
-/* Prototype for option handler. */
-static error_t parse_opt (int key, char *arg, struct argp_state *state);
-
-/* Function to print some extra text in the help message. */
-static char *more_help (int key, const char *text, void *input);
-
-/* Data structure to communicate with argp functions. */
-static struct argp argp =
-{
- options, parse_opt, args_doc, doc, NULL, more_help
-};
-
-/* Code sets to convert from and to respectively. An empty string as the
- default causes the 'iconv_open' function to look up the charset of the
- currently selected locale and use it. */
-static const char *from_code = "";
-static const char *to_code = "";
-
-/* File to write output to. If NULL write to stdout. */
-static const char *output_file;
-
-/* Nonzero if verbose ouput is wanted. */
-int verbose;
-
-/* Nonzero if list of all coded character sets is wanted. */
-static int list;
-
-/* If nonzero omit invalid character from output. */
-int omit_invalid;
-
-/* Prototypes for the functions doing the actual work. */
-static int process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file);
-static int process_fd (iconv_t cd, int fd, FILE **output,
- const char *output_file);
-static int process_file (iconv_t cd, FILE *input, FILE **output,
- const char *output_file);
-static void print_known_names (void) internal_function;
-
-
-int
-main (int argc, char *argv[])
-{
- int status = EXIT_SUCCESS;
- int remaining;
- iconv_t cd;
- const char *orig_to_code;
- struct charmap_t *from_charmap = NULL;
- struct charmap_t *to_charmap = NULL;
-
- /* Set locale via LC_ALL. */
- setlocale (LC_ALL, "");
-
- /* Set the text message domain. */
- textdomain (_libc_intl_domainname);
-
- /* Parse and process arguments. */
- argp_parse (&argp, argc, argv, 0, &remaining, NULL);
-
- /* List all coded character sets if wanted. */
- if (list)
- {
- print_known_names ();
- exit (EXIT_SUCCESS);
- }
-
- /* If we have to ignore errors make sure we use the appropriate name for
- the to-character-set. */
- orig_to_code = to_code;
- if (omit_invalid)
- {
- const char *errhand = strchrnul (to_code, '/');
- int nslash = 2;
- char *newp;
- char *cp;
-
- if (*errhand == '/')
- {
- --nslash;
- errhand = strchrnul (errhand + 1, '/');
-
- if (*errhand == '/')
- {
- --nslash;
- errhand = strchr (errhand, '\0');
- }
- }
-
- newp = (char *) alloca (errhand - to_code + nslash + 7 + 1);
- cp = mempcpy (newp, to_code, errhand - to_code);
- while (nslash-- > 0)
- *cp++ = '/';
- if (cp[-1] != '/')
- *cp++ = ',';
- memcpy (cp, "IGNORE", sizeof ("IGNORE"));
-
- to_code = newp;
- }
-
- /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f
- can be file names of charmaps. In this case iconv will have to read
- those charmaps and use them to do the conversion. But there are
- holes in the specification. There is nothing said that if -f is a
- charmap filename that -t must be, too. And vice versa. There is
- also no word about the symbolic names used. What if they don't
- match? */
- if (strchr (from_code, '/') != NULL)
- /* The from-name might be a charmap file name. Try reading the
- file. */
- from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0);
-
- if (strchr (orig_to_code, '/') != NULL)
- /* The to-name might be a charmap file name. Try reading the
- file. */
- to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0);
-
-
- /* At this point we have to handle two cases. The first one is
- where a charmap is used for the from- or to-charset, or both. We
- handle this special since it is very different from the sane way of
- doing things. The other case allows converting using the iconv()
- function. */
- if (from_charmap != NULL || to_charmap != NULL)
- /* Construct the conversion table and do the conversion. */
- status = charmap_conversion (from_code, from_charmap, to_code, to_charmap,
- argc, remaining, argv, output_file);
- else
- {
- /* Let's see whether we have these coded character sets. */
- cd = iconv_open (to_code, from_code);
- if (cd == (iconv_t) -1)
- {
- if (errno == EINVAL)
- {
- /* Try to be nice with the user and tell her which of the
- two encoding names is wrong. This is possible because
- all supported encodings can be converted from/to Unicode,
- in other words, because the graph of encodings is
- connected. */
- bool from_wrong =
- (iconv_open ("UTF-8", from_code) == (iconv_t) -1
- && errno == EINVAL);
- bool to_wrong =
- (iconv_open (to_code, "UTF-8") == (iconv_t) -1
- && errno == EINVAL);
- const char *from_pretty =
- (from_code[0] ? from_code : nl_langinfo (CODESET));
- const char *to_pretty =
- (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET));
-
- if (from_wrong)
- {
- if (to_wrong)
- error (0, 0,
- _("\
-conversions from `%s' and to `%s' are not supported"),
- from_pretty, to_pretty);
- else
- error (0, 0,
- _("conversion from `%s' is not supported"),
- from_pretty);
- }
- else
- {
- if (to_wrong)
- error (0, 0,
- _("conversion to `%s' is not supported"),
- to_pretty);
- else
- error (0, 0,
- _("conversion from `%s' to `%s' is not supported"),
- from_pretty, to_pretty);
- }
-
- argp_help (&argp, stderr, ARGP_HELP_SEE,
- program_invocation_short_name);
- exit (1);
- }
- else
- error (EXIT_FAILURE, errno,
- _("failed to start conversion processing"));
- }
-
- /* The output file. Will be opened when we are ready to produce
- output. */
- FILE *output = NULL;
-
- /* Now process the remaining files. Write them to stdout or the file
- specified with the `-o' parameter. If we have no file given as
- the parameter process all from stdin. */
- if (remaining == argc)
- {
- if (process_file (cd, stdin, &output, output_file) != 0)
- status = EXIT_FAILURE;
- }
- else
- do
- {
-#ifdef _POSIX_MAPPED_FILES
- struct stat64 st;
- char *addr;
-#endif
- int fd, ret;
-
- if (verbose)
- fprintf (stderr, "%s:\n", argv[remaining]);
- if (strcmp (argv[remaining], "-") == 0)
- fd = 0;
- else
- {
- fd = open (argv[remaining], O_RDONLY);
-
- if (fd == -1)
- {
- error (0, errno, _("cannot open input file `%s'"),
- argv[remaining]);
- status = EXIT_FAILURE;
- continue;
- }
- }
-
-#ifdef _POSIX_MAPPED_FILES
- /* We have possibilities for reading the input file. First try
- to mmap() it since this will provide the fastest solution. */
- if (fstat64 (fd, &st) == 0
- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
- fd, 0)) != MAP_FAILED))
- {
- /* Yes, we can use mmap(). The descriptor is not needed
- anymore. */
- if (close (fd) != 0)
- error (EXIT_FAILURE, errno,
- _("error while closing input `%s'"),
- argv[remaining]);
-
- ret = process_block (cd, addr, st.st_size, &output,
- output_file);
-
- /* We don't need the input data anymore. */
- munmap ((void *) addr, st.st_size);
-
- if (ret != 0)
- {
- status = EXIT_FAILURE;
-
- if (ret < 0)
- /* We cannot go on with producing output since it might
- lead to problem because the last output might leave
- the output stream in an undefined state. */
- break;
- }
- }
- else
-#endif /* _POSIX_MAPPED_FILES */
- {
- /* Read the file in pieces. */
- ret = process_fd (cd, fd, &output, output_file);
-
- /* Now close the file. */
- close (fd);
-
- if (ret != 0)
- {
- /* Something went wrong. */
- status = EXIT_FAILURE;
-
- if (ret < 0)
- /* We cannot go on with producing output since it might
- lead to problem because the last output might leave
- the output stream in an undefined state. */
- break;
- }
- }
- }
- while (++remaining < argc);
-
- /* Close the output file now. */
- if (output != NULL && fclose (output))
- error (EXIT_FAILURE, errno, _("error while closing output file"));
- }
-
- return status;
-}
-
-
-/* Handle program arguments. */
-static error_t
-parse_opt (int key, char *arg, struct argp_state *state)
-{
- switch (key)
- {
- case 'f':
- from_code = arg;
- break;
- case 't':
- to_code = arg;
- break;
- case 'o':
- output_file = arg;
- break;
- case 's':
- /* Nothing, for now at least. We are not giving out any information
- about missing character or so. */
- break;
- case 'c':
- /* Omit invalid characters from output. */
- omit_invalid = 1;
- break;
- case OPT_VERBOSE:
- verbose = 1;
- break;
- case OPT_LIST:
- list = 1;
- break;
- default:
- return ARGP_ERR_UNKNOWN;
- }
- return 0;
-}
-
-
-static char *
-more_help (int key, const char *text, void *input)
-{
- char *tp = NULL;
- switch (key)
- {
- case ARGP_KEY_HELP_EXTRA:
- /* We print some extra information. */
- if (asprintf (&tp, gettext ("\
-For bug reporting instructions, please see:\n\
-%s.\n"), REPORT_BUGS_TO) < 0)
- return NULL;
- return tp;
- default:
- break;
- }
- return (char *) text;
-}
-
-
-/* Print the version information. */
-static void
-print_version (FILE *stream, struct argp_state *state)
-{
- fprintf (stream, "iconv %s%s\n", PKGVERSION, VERSION);
- fprintf (stream, gettext ("\
-Copyright (C) %s Free Software Foundation, Inc.\n\
-This is free software; see the source for copying conditions. There is NO\n\
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2017");
- fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
-}
-
-
-static int
-write_output (const char *outbuf, const char *outptr, FILE **output,
- const char *output_file)
-{
- /* We have something to write out. */
- int errno_save = errno;
-
- if (*output == NULL)
- {
- /* Determine output file. */
- if (output_file != NULL && strcmp (output_file, "-") != 0)
- {
- *output = fopen (output_file, "w");
- if (*output == NULL)
- error (EXIT_FAILURE, errno, _("cannot open output file"));
- }
- else
- *output = stdout;
- }
-
- if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf)
- || ferror (*output))
- {
- /* Error occurred while printing the result. */
- error (0, 0, _("\
-conversion stopped due to problem in writing the output"));
- return -1;
- }
-
- errno = errno_save;
-
- return 0;
-}
-
-
-static int
-process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file)
-{
-#define OUTBUF_SIZE 32768
- const char *start = addr;
- char outbuf[OUTBUF_SIZE];
- char *outptr;
- size_t outlen;
- size_t n;
- int ret = 0;
-
- while (len > 0)
- {
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, &addr, &len, &outptr, &outlen);
-
- if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
- {
- ret = 1;
- if (len == 0)
- n = 0;
- else
- errno = E2BIG;
- }
-
- if (outptr != outbuf)
- {
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
- }
-
- if (n != (size_t) -1)
- {
- /* All the input test is processed. For state-dependent
- character sets we have to flush the state now. */
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, NULL, NULL, &outptr, &outlen);
-
- if (outptr != outbuf)
- {
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
- }
-
- if (n != (size_t) -1)
- break;
-
- if (omit_invalid && errno == EILSEQ)
- {
- ret = 1;
- break;
- }
- }
-
- if (errno != E2BIG)
- {
- /* iconv() ran into a problem. */
- switch (errno)
- {
- case EILSEQ:
- if (! omit_invalid)
- error (0, 0, _("illegal input sequence at position %ld"),
- (long int) (addr - start));
- break;
- case EINVAL:
- error (0, 0, _("\
-incomplete character or shift sequence at end of buffer"));
- break;
- case EBADF:
- error (0, 0, _("internal error (illegal descriptor)"));
- break;
- default:
- error (0, 0, _("unknown iconv() error %d"), errno);
- break;
- }
-
- return -1;
- }
- }
-
- return ret;
-}
-
-
-static int
-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
-{
- /* we have a problem with reading from a desriptor since we must not
- provide the iconv() function an incomplete character or shift
- sequence at the end of the buffer. Since we have to deal with
- arbitrary encodings we must read the whole text in a buffer and
- process it in one step. */
- static char *inbuf = NULL;
- static size_t maxlen = 0;
- char *inptr = NULL;
- size_t actlen = 0;
-
- while (actlen < maxlen)
- {
- ssize_t n = read (fd, inptr, maxlen - actlen);
-
- if (n == 0)
- /* No more text to read. */
- break;
-
- if (n == -1)
- {
- /* Error while reading. */
- error (0, errno, _("error while reading the input"));
- return -1;
- }
-
- inptr += n;
- actlen += n;
- }
-
- if (actlen == maxlen)
- while (1)
- {
- ssize_t n;
- char *new_inbuf;
-
- /* Increase the buffer. */
- new_inbuf = (char *) realloc (inbuf, maxlen + 32768);
- if (new_inbuf == NULL)
- {
- error (0, errno, _("unable to allocate buffer for input"));
- return -1;
- }
- inbuf = new_inbuf;
- maxlen += 32768;
- inptr = inbuf + actlen;
-
- do
- {
- n = read (fd, inptr, maxlen - actlen);
-
- if (n == 0)
- /* No more text to read. */
- break;
-
- if (n == -1)
- {
- /* Error while reading. */
- error (0, errno, _("error while reading the input"));
- return -1;
- }
-
- inptr += n;
- actlen += n;
- }
- while (actlen < maxlen);
-
- if (n == 0)
- /* Break again so we leave both loops. */
- break;
- }
-
- /* Now we have all the input in the buffer. Process it in one run. */
- return process_block (cd, inbuf, actlen, output, output_file);
-}
-
-
-static int
-process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file)
-{
- /* This should be safe since we use this function only for `stdin' and
- we haven't read anything so far. */
- return process_fd (cd, fileno (input), output, output_file);
-}
-
-
-/* Print all known character sets/encodings. */
-static void *printlist;
-static size_t column;
-static int not_first;
-
-static void
-insert_print_list (const void *nodep, VISIT value, int level)
-{
- if (value == leaf || value == postorder)
- {
- const struct gconv_alias *s = *(const struct gconv_alias **) nodep;
- tsearch (s->fromname, &printlist, (__compar_fn_t) strverscmp);
- }
-}
-
-static void
-do_print_human (const void *nodep, VISIT value, int level)
-{
- if (value == leaf || value == postorder)
- {
- const char *s = *(const char **) nodep;
- size_t len = strlen (s);
- size_t cnt;
-
- while (len > 0 && s[len - 1] == '/')
- --len;
-
- for (cnt = 0; cnt < len; ++cnt)
- if (isalnum (s[cnt]))
- break;
- if (cnt == len)
- return;
-
- if (not_first)
- {
- putchar (',');
- ++column;
-
- if (column > 2 && column + len > 77)
- {
- fputs ("\n ", stdout);
- column = 2;
- }
- else
- {
- putchar (' ');
- ++column;
- }
- }
- else
- not_first = 1;
-
- fwrite (s, len, 1, stdout);
- column += len;
- }
-}
-
-static void
-do_print (const void *nodep, VISIT value, int level)
-{
- if (value == leaf || value == postorder)
- {
- const char *s = *(const char **) nodep;
-
- puts (s);
- }
-}
-
-static void
-internal_function
-add_known_names (struct gconv_module *node)
-{
- if (node->left != NULL)
- add_known_names (node->left);
- if (node->right != NULL)
- add_known_names (node->right);
- do
- {
- if (strcmp (node->from_string, "INTERNAL") != 0)
- tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp);
- if (strcmp (node->to_string, "INTERNAL") != 0)
- tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
-
- node = node->same;
- }
- while (node != NULL);
-}
-
-
-static void
-insert_cache (void)
-{
- const struct gconvcache_header *header;
- const char *strtab;
- const struct hash_entry *hashtab;
- size_t cnt;
-
- header = (const struct gconvcache_header *) __gconv_get_cache ();
- strtab = (char *) header + header->string_offset;
- hashtab = (struct hash_entry *) ((char *) header + header->hash_offset);
-
- for (cnt = 0; cnt < header->hash_size; ++cnt)
- if (hashtab[cnt].string_offset != 0)
- {
- const char *str = strtab + hashtab[cnt].string_offset;
-
- if (strcmp (str, "INTERNAL") != 0)
- tsearch (str, &printlist, (__compar_fn_t) strverscmp);
- }
-}
-
-
-static void
-internal_function
-print_known_names (void)
-{
- iconv_t h;
- void *cache;
-
- /* We must initialize the internal databases first. */
- h = iconv_open ("L1", "L1");
- iconv_close (h);
-
- /* See whether we have a cache. */
- cache = __gconv_get_cache ();
- if (cache != NULL)
- /* Yep, use only this information. */
- insert_cache ();
- else
- {
- struct gconv_module *modules;
-
- /* No, then use the information read from the gconv-modules file.
- First add the aliases. */
- twalk (__gconv_get_alias_db (), insert_print_list);
-
- /* Add the from- and to-names from the known modules. */
- modules = __gconv_get_modules_db ();
- if (modules != NULL)
- add_known_names (modules);
- }
-
- bool human_readable = isatty (fileno (stdout));
-
- if (human_readable)
- fputs (_("\
-The following list contains all the coded character sets known. This does\n\
-not necessarily mean that all combinations of these names can be used for\n\
-the FROM and TO command line parameters. One coded character set can be\n\
-listed with several different names (aliases).\n\n "), stdout);
-
- /* Now print the collected names. */
- column = 2;
- twalk (printlist, human_readable ? do_print_human : do_print);
-
- if (human_readable && column != 0)
- puts ("");
-}