From ee25ee65c50800db31d0fde1db70338e3c44d8a7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 28 Nov 2000 13:54:25 +0000 Subject: (normalize_line): Take extra parameter with escape character. Change callers. (open_conversion): Determine mapping of 0x5c as wchar_t value. --- catgets/gencat.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/catgets/gencat.c b/catgets/gencat.c index 0200ca44ef..49b63363ba 100644 --- a/catgets/gencat.c +++ b/catgets/gencat.c @@ -152,10 +152,11 @@ static void write_out (struct catalog *result, const char *output_name, const char *header_name); static struct set_list *find_set (struct catalog *current, int number); static void normalize_line (const char *fname, size_t line, iconv_t cd, - wchar_t *string, wchar_t quote_char); + wchar_t *string, wchar_t quote_char, + wchar_t escape_char); static void read_old (struct catalog *catalog, const char *file_name); static int open_conversion (const char *codesetp, iconv_t *cd_towcp, - iconv_t *cd_tombp); + iconv_t *cd_tombp, wchar_t *escape_charp); int @@ -272,6 +273,7 @@ read_input_file (struct catalog *current, const char *fname) size_t wbufsize; iconv_t cd_towc = (iconv_t) -1; iconv_t cd_tomb = (iconv_t) -1; + wchar_t escape_char = L'\\'; char *codeset = NULL; if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0) @@ -522,7 +524,8 @@ this is the first definition")); /* We need the conversion. */ if (cd_towc == (iconv_t) -1 - && open_conversion (codeset, &cd_towc, &cd_tomb) != 0) + && open_conversion (codeset, &cd_towc, &cd_tomb, + &escape_char) != 0) /* Something is wrong. */ goto out; @@ -648,7 +651,8 @@ duplicated message identifier")); /* We need the conversion. */ if (cd_towc == (iconv_t) -1 - && open_conversion (codeset, &cd_towc, &cd_tomb) != 0) + && open_conversion (codeset, &cd_towc, &cd_tomb, + &escape_char) != 0) /* Something is wrong. */ goto out; @@ -694,7 +698,7 @@ invalid character: message ignored")); /* Strip quote characters, change escape sequences into correct characters etc. */ normalize_line (fname, start_line, cd_towc, wbuf, - current->quote_char); + current->quote_char, escape_char); /* Now the string is free of escape sequences. Convert it back into a multibyte character string. First free the @@ -1053,7 +1057,7 @@ find_set (struct catalog *current, int number) and quote characters. */ static void normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, - wchar_t quote_char) + wchar_t quote_char, wchar_t escape_char) { int is_quoted; wchar_t *rp = string; @@ -1072,7 +1076,7 @@ normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, /* We simply end the string when we find the first time an not-escaped quote character. */ break; - else if (*rp == L'\\') + else if (*rp == escape_char) { ++rp; if (quote_char != L'\0' && *rp == quote_char) @@ -1106,10 +1110,6 @@ normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, *wp++ = L'\f'; ++rp; break; - case L'\\': - *wp++ = L'\\'; - ++rp; - break; case L'0' ... L'7': { int number; @@ -1147,7 +1147,13 @@ normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, } break; default: - /* Simply ignore the backslash character. */ + if (*rp == escape_char) + { + *wp++ = escape_char; + ++rp; + } + else + /* Simply ignore the backslash character. */; break; } } @@ -1255,8 +1261,16 @@ read_old (struct catalog *catalog, const char *file_name) static int -open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp) +open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp, + wchar_t *escape_charp) { + char buf[2]; + char *bufptr; + size_t bufsize; + wchar_t wbuf[2]; + char *wbufptr; + size_t wbufsize; + /* If the input file does not specify the codeset use the locale's. */ if (codeset == NULL) { @@ -1277,5 +1291,30 @@ open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp) return 1; } + /* One special case for historical reasons is the backslash + character. In some codesets the byte value 0x5c is not mapped to + U005c in Unicode. These charsets then don't have a backslash + character at all. Therefore we have to live with whatever the + codeset provides and recognize, instead of the U005c, the character + the byte value 0x5c is mapped to. */ + buf[0] = '\\'; + buf[1] = '\0'; + bufptr = buf; + bufsize = 2; + + wbufptr = (char *) wbuf; + wbufsize = sizeof (wbuf); + + iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize); + if (bufsize != 0 || wbufsize != 0) + { + /* Something went wrong, we couldn't convert the byte 0x5c. Go + on with using U005c. */ + error (0, 0, gettext ("cannot determine escape character")); + *escape_charp = L'\\'; + } + else + *escape_charp = wbuf[0]; + return 0; } -- cgit v1.2.3