aboutsummaryrefslogtreecommitdiff
path: root/locale/programs/charmap.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-27 23:22:23 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-27 23:22:23 +0000
commit2d05bb3556b808a605342d80602f7d4ae5262b02 (patch)
treee2fa8c2ec3dfbf7124ec60a0bed9e132699590ea /locale/programs/charmap.c
parentc41041bc2f84eb8f44ff36c3d14e55944185e665 (diff)
downloadglibc-2d05bb3556b808a605342d80602f7d4ae5262b02.tar
glibc-2d05bb3556b808a605342d80602f7d4ae5262b02.tar.gz
glibc-2d05bb3556b808a605342d80602f7d4ae5262b02.tar.bz2
glibc-2d05bb3556b808a605342d80602f7d4ae5262b02.zip
Update.
1999-12-27 Ulrich Drepper <drepper@cygnus.com> * locale/programs/charmap.c (charmap_new_char): Recognize special character names Uxxxx and Pxxxx and initialize the UCS4 value from the value of xxxx.
Diffstat (limited to 'locale/programs/charmap.c')
-rw-r--r--locale/programs/charmap.c53
1 files changed, 52 insertions, 1 deletions
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
index 6db2b420a6..9bf0b6a1b6 100644
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@@ -800,7 +800,32 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm,
newp->nbytes = nbytes;
memcpy (newp->bytes, bytes, nbytes);
newp->name = obstack_copy (ob, from, len1 + 1);
+
newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
+ {
+ /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
+ xxxx and xxxxxxxx are hexadecimal numbers. In this case
+ we use the value of xxxx or xxxxxxxx as the UCS4 value of
+ this character and we don't have to consult the repertoire
+ map.
+
+ If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
+ and xxxxxxxx also give the code point in UCS4 but this must
+ be in the private, i.e., unassigned, area. This should be
+ used for characters which do not (yet) have an equivalent
+ in ISO 10646 and Unicode. */
+ char *endp;
+
+ errno = 0;
+ newp->ucs4 = strtoul (from, &endp, 16);
+ if (endp - from != len1
+ || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
+ || newp->ucs4 >= 0x80000000)
+ /* This wasn't successful. Signal this name cannot be a
+ correct UCS value. */
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ }
insert_entry (ht, from, len1, newp);
insert_entry (bt, newp->bytes, nbytes, newp);
@@ -847,7 +872,7 @@ hexadecimal range format should use only capital characters"));
&& errno == ERANGE)
|| *to_end != '\0')
{
- lr_error (lr, _("<%s> and <%s> are illegal names for range"));
+ lr_error (lr, _("<%s> and <%s> are illegal names for range"), from, to);
return;
}
@@ -868,7 +893,33 @@ hexadecimal range format should use only capital characters"));
newp->nbytes = nbytes;
memcpy (newp->bytes, bytes, nbytes);
newp->name = name_end;
+
newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ if ((name_end[0] == 'U' || name_end[0] == 'P')
+ && (len1 == 5 || len1 == 9))
+ {
+ /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
+ xxxx and xxxxxxxx are hexadecimal numbers. In this case
+ we use the value of xxxx or xxxxxxxx as the UCS4 value of
+ this character and we don't have to consult the repertoire
+ map.
+
+ If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
+ and xxxxxxxx also give the code point in UCS4 but this must
+ be in the private, i.e., unassigned, area. This should be
+ used for characters which do not (yet) have an equivalent
+ in ISO 10646 and Unicode. */
+ char *endp;
+
+ errno = 0;
+ newp->ucs4 = strtoul (name_end, &endp, 16);
+ if (endp - name_end != len1
+ || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
+ || newp->ucs4 >= 0x80000000)
+ /* This wasn't successful. Signal this name cannot be a
+ correct UCS value. */
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ }
insert_entry (ht, name_end, len1, newp);
insert_entry (bt, newp->bytes, nbytes, newp);