summaryrefslogtreecommitdiff
path: root/wcsmbs/c16rtomb.c
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2018-10-19 16:31:29 +0000
committerJoseph Myers <joseph@codesourcery.com>2018-10-19 16:31:29 +0000
commitd0a74159792477e5922f53fa2aa6d58eb8265a14 (patch)
treefcce06511d2a91afa649f629cde15875b6fe6f6a /wcsmbs/c16rtomb.c
parentf997b4be18f7e57d757d39e42f7715db26528aa0 (diff)
downloadglibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar.gz
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.tar.bz2
glibc-d0a74159792477e5922f53fa2aa6d58eb8265a14.zip
Handle surrogate pairs in c16rtomb (bug 23794, DR#488, C2X).
The c16rtomb implementation has: // XXX The ISO C 11 spec I have does not say anything about handling // XXX surrogates in this interface. The DR#488 resolution, as applied to C2X, requires surrogate pairs to be handled here (so the first call returns 0 and stores the high surrogate in the mbstate_t, while the second call combines the surrogates, produces a multibyte character and returns the number of bytes written). This patch implements that. (mbrtoc16 already handled producing surrogates as output.) Tested for x86_64. [BZ #23794] * wcsmbs/c16rtomb.c (c16rtomb): Save first character of surrogate pair and return 0 in that case, and use saved character to interpret following character. * wcsmbs/tst-c16-surrogate.c: New file. * wcsmbs/Makefile (tests): Add tst-c16-surrogate.c. [$(run-built-tests) = yes] ($(objpfx)tst-c16-surrogate.out): Depend on $(gen-locales)
Diffstat (limited to 'wcsmbs/c16rtomb.c')
-rw-r--r--wcsmbs/c16rtomb.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
index 48a63d067b..74950d8173 100644
--- a/wcsmbs/c16rtomb.c
+++ b/wcsmbs/c16rtomb.c
@@ -26,7 +26,42 @@ static mbstate_t state;
size_t
c16rtomb (char *s, char16_t c16, mbstate_t *ps)
{
- // XXX The ISO C 11 spec I have does not say anything about handling
- // XXX surrogates in this interface.
- return wcrtomb (s, c16, ps ?: &state);
+ wchar_t wc = c16;
+
+ if (ps == NULL)
+ ps = &state;
+
+ if (s == NULL)
+ {
+ /* Reset any state relating to surrogate pairs. */
+ ps->__count &= 0x7fffffff;
+ ps->__value.__wch = 0;
+ wc = 0;
+ }
+
+ if (ps->__count & 0x80000000)
+ {
+ /* The previous call passed in the first surrogate of a
+ surrogate pair. */
+ ps->__count &= 0x7fffffff;
+ if (wc >= 0xdc00 && wc < 0xe000)
+ wc = (0x10000
+ + ((ps->__value.__wch & 0x3ff) << 10)
+ + (wc & 0x3ff));
+ else
+ /* This is not a low surrogate; ensure an EILSEQ error by
+ trying to decode the high surrogate as a wide character on
+ its own. */
+ wc = ps->__value.__wch;
+ ps->__value.__wch = 0;
+ }
+ else if (wc >= 0xd800 && wc < 0xdc00)
+ {
+ /* The high part of a surrogate pair. */
+ ps->__count |= 0x80000000;
+ ps->__value.__wch = wc;
+ return 0;
+ }
+
+ return wcrtomb (s, wc, ps);
}