From b79f74cd622578ce5eea1a3ed5840ac53d6b6d93 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 5 Sep 2000 02:41:25 +0000 Subject: Update. 2000-09-03 Bruno Haible * charmaps/EUC-TW: Add commented non-reversible mappings. 2000-09-03 Bruno Haible * charmaps/CP949: New file. 2000-09-03 Bruno Haible * charmaps/GB2312: Remove 0x80..0xA0, 0xAA..0xAF, 0xF8..FF. 2000-09-03 Bruno Haible * charmaps/EUC-JP: Nonreversibly map 0xA1C0 to U+005C and 0x8FA2B7 to U+007E. --- iconvdata/tst-table-from.c | 225 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 iconvdata/tst-table-from.c (limited to 'iconvdata/tst-table-from.c') diff --git a/iconvdata/tst-table-from.c b/iconvdata/tst-table-from.c new file mode 100644 index 0000000000..bd2647eae0 --- /dev/null +++ b/iconvdata/tst-table-from.c @@ -0,0 +1,225 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Create a table from CHARSET to Unicode. + This is a good test for CHARSET's iconv() module, in particular the + FROM_LOOP BODY macro. */ + +#include +#include +#include +#include +#include + +/* Converts a byte buffer to a hexadecimal string. */ +static const char* +hexbuf (unsigned char buf[], unsigned int buflen) +{ + static char msg[50]; + + switch (buflen) + { + case 1: + sprintf (msg, "0x%02X", buf[0]); + break; + case 2: + sprintf (msg, "0x%02X%02X", buf[0], buf[1]); + break; + case 3: + sprintf (msg, "0x%02X%02X%02X", buf[0], buf[1], buf[2]); + break; + case 4: + sprintf (msg, "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]); + break; + default: + abort (); + } + return msg; +} + +/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (6 bytes) + using the conversion descriptor CD. Returns the number of written bytes, + or 0 if ambiguous, or -1 if invalid. */ +static int +try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out) +{ + const char *inbuf = (const char *) buf; + size_t inbytesleft = buflen; + char *outbuf = (char *) out; + size_t outbytesleft = 6; + size_t result = iconv (cd, + (char *) &inbuf, &inbytesleft, + &outbuf, &outbytesleft); + if (result == (size_t)(-1)) + { + if (errno == EILSEQ) + { + return -1; + } + else if (errno == EINVAL) + { + return 0; + } + else + { + int saved_errno = errno; + fprintf (stderr, "%s: iconv error: ", hexbuf (buf, buflen)); + errno = saved_errno; + perror (""); + exit (1); + } + } + else + { + if (inbytesleft != 0) + { + fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n", + hexbuf (buf, buflen), + (long) (buflen - inbytesleft), + (long) (6 - outbytesleft)); + exit (1); + } + return 6 - outbytesleft; + } +} + +/* Returns the out[] buffer as a Unicode value. */ +static unsigned int +utf8_decode (const unsigned char *out, unsigned int outlen) +{ + return (outlen==1 ? out[0] : + outlen==2 ? ((out[0] & 0x1f) << 6) + (out[1] & 0x3f) : + outlen==3 ? ((out[0] & 0x0f) << 12) + ((out[1] & 0x3f) << 6) + (out[2] & 0x3f) : + outlen==4 ? ((out[0] & 0x07) << 18) + ((out[1] & 0x3f) << 12) + ((out[2] & 0x3f) << 6) + (out[3] & 0x3f) : + outlen==5 ? ((out[0] & 0x03) << 24) + ((out[1] & 0x3f) << 18) + ((out[2] & 0x3f) << 12) + ((out[3] & 0x3f) << 6) + (out[4] & 0x3f) : + outlen==6 ? ((out[0] & 0x01) << 30) + ((out[1] & 0x3f) << 24) + ((out[2] & 0x3f) << 18) + ((out[3] & 0x3f) << 12) + ((out[4] & 0x3f) << 6) + (out[5] & 0x3f) : + 0xfffd); +} + +int +main (int argc, char *argv[]) +{ + const char *charset; + iconv_t cd; + + if (argc != 2) + { + fprintf (stderr, "Usage: tst-table-to charset\n"); + exit (1); + } + charset = argv[1]; + + cd = iconv_open ("UTF-8", charset); + if (cd == (iconv_t)(-1)) + { + perror ("iconv_open"); + exit (1); + } + + { + unsigned char out[6]; + unsigned char buf[4]; + unsigned int i0, i1, i2, i3; + int result; + + for (i0 = 0; i0 < 0x100; i0++) + { + buf[0] = i0; + result = try (cd, buf, 1, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X\t0x%04X\n", + i0, utf8_decode (out, result)); + } + else + { + for (i1 = 0; i1 < 0x100; i1++) + { + buf[1] = i1; + result = try (cd, buf, 2, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X%02X\t0x%04X\n", + i0, i1, utf8_decode (out, result)); + } + else + { + for (i2 = 0; i2 < 0x100; i2++) + { + buf[2] = i2; + result = try (cd, buf, 3, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X%02X%02X\t0x%04X\n", + i0, i1, i2, utf8_decode (out, result)); + } + else if (strcmp (charset, "UTF-8")) + { + for (i3 = 0; i3 < 0x100; i3++) + { + buf[3] = i3; + result = try (cd, buf, 4, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X%02X%02X%02X\t0x%04X\n", + i0, i1, i2, i3, + utf8_decode (out, result)); + } + else + { + fprintf (stderr, + "%s: incomplete byte sequence\n", + hexbuf (buf, 4)); + exit (1); + } + } + } + } + } + } + } + } + } + + if (iconv_close (cd) < 0) + { + perror ("iconv_close"); + exit (1); + } + + if (ferror (stdin) || ferror (stdout)) + { + fprintf (stderr, "I/O error\n"); + exit (1); + } + + exit (0); +} -- cgit v1.2.3