aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/iconvdata/euc-kr.c
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/iconvdata/euc-kr.c')
-rw-r--r--REORG.TODO/iconvdata/euc-kr.c165
1 files changed, 165 insertions, 0 deletions
diff --git a/REORG.TODO/iconvdata/euc-kr.c b/REORG.TODO/iconvdata/euc-kr.c
new file mode 100644
index 0000000000..736de90429
--- /dev/null
+++ b/REORG.TODO/iconvdata/euc-kr.c
@@ -0,0 +1,165 @@
+/* Mapping tables for EUC-KR handling.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
+ and Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <ksc5601.h>
+
+
+static inline void
+__attribute ((always_inline))
+euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
+{
+ if (ch > 0x9f)
+ {
+ if (__builtin_expect (ch, 0) == 0x20a9)
+ {
+ /* Half-width Korean Currency WON sign. There is no
+ equivalent in EUC-KR. Some mappings use \x5c because
+ this is what some old Korean ASCII variants used but this
+ is causing problems. We map it to the FULL WIDTH WON SIGN. */
+ cp[0] = '\xa3';
+ cp[1] = '\xdc';
+ }
+ else if (__builtin_expect (ucs4_to_ksc5601 (ch, cp, 2), 0)
+ != __UNKNOWN_10646_CHAR)
+ {
+ cp[0] |= 0x80;
+ cp[1] |= 0x80;
+ }
+ else
+ cp[0] = cp[1] = '\0';
+ }
+ else
+ {
+ /* There is no mapping for U005c but we nevertheless map it to
+ \x5c. */
+ cp[0] = (unsigned char) ch;
+ cp[1] = '\0';
+ }
+}
+
+
+/* Definitions used in the body of the `gconv' function. */
+#define CHARSET_NAME "EUC-KR//"
+#define FROM_LOOP from_euc_kr
+#define TO_LOOP to_euc_kr
+#define DEFINE_INIT 1
+#define DEFINE_FINI 1
+#define MIN_NEEDED_FROM 1
+#define MAX_NEEDED_FROM 2
+#define MIN_NEEDED_TO 4
+#define ONE_DIRECTION 0
+
+
+/* First define the conversion function from EUC-KR to UCS4. */
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#define BODY \
+ { \
+ uint32_t ch = *inptr; \
+ \
+ if (ch <= 0x9f) \
+ ++inptr; \
+ /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \
+ user-defined areas. */ \
+ else if (__builtin_expect (ch == 0xa0, 0) \
+ || __builtin_expect (ch > 0xfe, 0) \
+ || __builtin_expect (ch == 0xc9, 0)) \
+ { \
+ /* This is illegal. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ } \
+ else \
+ { \
+ /* Two-byte character. First test whether the next byte \
+ is also available. */ \
+ ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0x80); \
+ if (__glibc_unlikely (ch == 0)) \
+ { \
+ /* The second byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
+ } \
+ \
+ put32 (outptr, ch); \
+ outptr += 4; \
+ }
+#define LOOP_NEED_FLAGS
+#define ONEBYTE_BODY \
+ { \
+ if (c <= 0x9f) \
+ return c; \
+ else \
+ return WEOF; \
+ }
+#include <iconv/loop.c>
+
+
+/* Next, define the other direction. */
+#define MIN_NEEDED_INPUT MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
+#define LOOPFCT TO_LOOP
+#define BODY \
+ { \
+ uint32_t ch = get32 (inptr); \
+ unsigned char cp[2]; \
+ \
+ /* Decomposing Hangul syllables not available in KS C 5601 into \
+ Jamos should be considered either here or in euckr_from_ucs4() */ \
+ euckr_from_ucs4 (ch, cp); \
+ \
+ if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0) \
+ { \
+ UNICODE_TAG_HANDLER (ch, 4); \
+ \
+ /* Illegal character. */ \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
+ } \
+ \
+ *outptr++ = cp[0]; \
+ /* Now test for a possible second byte and write this if possible. */ \
+ if (cp[1] != '\0') \
+ { \
+ if (__glibc_unlikely (outptr >= outend)) \
+ { \
+ /* The result does not fit into the buffer. */ \
+ --outptr; \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ *outptr++ = cp[1]; \
+ } \
+ \
+ inptr += 4; \
+ }
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+
+
+/* Now define the toplevel functions. */
+#include <iconv/skeleton.c>