aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--locale/programs/charmap.c53
2 files changed, 58 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 1dff3ed602..2f4c0f03ac 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+1999-12-27 Ulrich Drepper <drepper@cygnus.com>
+
+ * locale/programs/charmap.c (charmap_new_char): Recognize special
+ character names Uxxxx and Pxxxx and initialize the UCS4 value from
+ the value of xxxx.
+
1999-12-17 Andreas Jaeger <aj@suse.de>
* stdlib/Versions: Export __cxa_atexit and __cxa_finalize.
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
index 6db2b420a6..9bf0b6a1b6 100644
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@@ -800,7 +800,32 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm,
newp->nbytes = nbytes;
memcpy (newp->bytes, bytes, nbytes);
newp->name = obstack_copy (ob, from, len1 + 1);
+
newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
+ {
+ /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
+ xxxx and xxxxxxxx are hexadecimal numbers. In this case
+ we use the value of xxxx or xxxxxxxx as the UCS4 value of
+ this character and we don't have to consult the repertoire
+ map.
+
+ If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
+ and xxxxxxxx also give the code point in UCS4 but this must
+ be in the private, i.e., unassigned, area. This should be
+ used for characters which do not (yet) have an equivalent
+ in ISO 10646 and Unicode. */
+ char *endp;
+
+ errno = 0;
+ newp->ucs4 = strtoul (from, &endp, 16);
+ if (endp - from != len1
+ || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
+ || newp->ucs4 >= 0x80000000)
+ /* This wasn't successful. Signal this name cannot be a
+ correct UCS value. */
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ }
insert_entry (ht, from, len1, newp);
insert_entry (bt, newp->bytes, nbytes, newp);
@@ -847,7 +872,7 @@ hexadecimal range format should use only capital characters"));
&& errno == ERANGE)
|| *to_end != '\0')
{
- lr_error (lr, _("<%s> and <%s> are illegal names for range"));
+ lr_error (lr, _("<%s> and <%s> are illegal names for range"), from, to);
return;
}
@@ -868,7 +893,33 @@ hexadecimal range format should use only capital characters"));
newp->nbytes = nbytes;
memcpy (newp->bytes, bytes, nbytes);
newp->name = name_end;
+
newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ if ((name_end[0] == 'U' || name_end[0] == 'P')
+ && (len1 == 5 || len1 == 9))
+ {
+ /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
+ xxxx and xxxxxxxx are hexadecimal numbers. In this case
+ we use the value of xxxx or xxxxxxxx as the UCS4 value of
+ this character and we don't have to consult the repertoire
+ map.
+
+ If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
+ and xxxxxxxx also give the code point in UCS4 but this must
+ be in the private, i.e., unassigned, area. This should be
+ used for characters which do not (yet) have an equivalent
+ in ISO 10646 and Unicode. */
+ char *endp;
+
+ errno = 0;
+ newp->ucs4 = strtoul (name_end, &endp, 16);
+ if (endp - name_end != len1
+ || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
+ || newp->ucs4 >= 0x80000000)
+ /* This wasn't successful. Signal this name cannot be a
+ correct UCS value. */
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ }
insert_entry (ht, name_end, len1, newp);
insert_entry (bt, newp->bytes, nbytes, newp);