diff options
author | Roland McGrath <roland@gnu.org> | 1995-10-16 01:37:51 +0000 |
---|---|---|
committer | Roland McGrath <roland@gnu.org> | 1995-10-16 01:37:51 +0000 |
commit | 8f5ca04bc7fd53741d80117df992995ace8f6d2d (patch) | |
tree | e39c13fc198b22ec55647259a8080051988e8c69 /stdio/_itoa.c | |
parent | 5d82cf5c55f56ae10d3b0a205d1fcc7de1cf56a0 (diff) | |
download | glibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar glibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar.gz glibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar.bz2 glibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.zip |
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
* malloc/malloc.c (_malloc_internal): Performance fix. Move
if statement out of loop.
* stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster
implementation using GMP functions. Contributed by
Torbjorn Granlund and Ulrich Drepper.
* stdio/test_rdwr.c: Include <errno.h>.
* sysdeps/i386/i586/Implies: New file.
New highly optimized string functions for i[345]86.
* sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files.
* sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files.
* sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files.
* sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files.
* sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files.
* sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files.
* sysdeps/i386/i586/strlen.S: New file.
* sysdeps/i386/memchr.c: Removed. There is now an assembler version.
* sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did
not correspond to used values.
* sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper
around a kernel header file.
* sysdeps/unix/sysv/linux/Dist: Add it.
* sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers):
Likewise.
* sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of
defining ourself we use a kernel header file.
* sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system
call handler for i586.
* sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
* malloc/malloc.c (_malloc_internal): Performance fix. Move
if statement out of loop.
* stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster
implementation using GMP functions. Contributed by
Torbjorn Granlund and Ulrich Drepper.
* stdio/test_rdwr.c: Include <errno.h>.
* sysdeps/i386/i586/Implies: New file.
New highly optimized string functions for i[345]86.
* sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files.
* sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files.
* sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files.
* sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files.
* sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files.
* sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files.
* sysdeps/i386/i586/strlen.S: New file.
* sysdeps/i386/memchr.c: Removed. There is now an assembler version.
* sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did
not correspond to used values.
* sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper
around a kernel header file.
* sysdeps/unix/sysv/linux/Dist: Add it.
* sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers):
Likewise.
* sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of
defining ourself we use a kernel header file.
* sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system
call handler for i586.
* sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
Diffstat (limited to 'stdio/_itoa.c')
-rw-r--r-- | stdio/_itoa.c | 401 |
1 files changed, 395 insertions, 6 deletions
diff --git a/stdio/_itoa.c b/stdio/_itoa.c index 19e732dcfe..caa8179624 100644 --- a/stdio/_itoa.c +++ b/stdio/_itoa.c @@ -1,6 +1,8 @@ /* Internal function for converting integers to ASCII. Copyright (C) 1994, 1995 Free Software Foundation, Inc. This file is part of the GNU C Library. +Contributed by Torbjorn Granlund <tege@matematik.su.se> +and Ulrich Drepper <drepper@gnu.ai.mit.edu>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -17,13 +19,400 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <gmp-mparam.h> +#include "../stdlib/gmp.h" +#include "../stdlib/gmp-impl.h" +#include "../stdlib/longlong.h" + +#include "_itoa.h" + + +/* Canonize environment. For some architectures not all values might + be defined in the GMP header files. */ +#ifndef UMUL_TIME +# define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +# define UDIV_TIME 1 +#endif + +/* Control memory layout. */ +#ifdef PACK +# undef PACK +# define PACK __attribute__ ((packed)) +#else +# define PACK +#endif + + +/* Declare local types. */ +struct base_table_t +{ +#if (UDIV_TIME > 2 * UMUL_TIME) + mp_limb base_multiplier; +#endif + char flag; + char post_shift; +#if BITS_PER_MP_LIMB == 32 + struct + { + char normalization_steps; + char ndigits; + mp_limb base PACK; +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb base_ninv PACK; +#endif + } big; +#endif +}; + +/* To reduce the memory needed we include some fields of the tables + only confitionally. */ +#if BITS_PER_MP_LIMB == 32 +# if UDIV_TIME > 2 * UMUL_TIME +# define SEL1(X) X, +# define SEL2(X) ,X +# else +# define SEL1(X) +# define SEL2(X) +# endif +#endif + + +/* Local variables. */ +static const struct base_table_t base_table[] = +{ +#if BITS_PER_MP_LIMB == 64 + /* 2 */ {0ul, 1, 1}, + /* 3 */ {0xaaaaaaaaaaaaaaabul, 0, 1}, + /* 4 */ {0ul, 1, 2}, + /* 5 */ {0xcccccccccccccccdul, 0, 2}, + /* 6 */ {0xaaaaaaaaaaaaaaabul, 0, 2}, + /* 7 */ {0x2492492492492493ul, 1, 3}, + /* 8 */ {0ul, 1, 3}, + /* 9 */ {0xe38e38e38e38e38ful, 0, 3}, + /* 10 */ {0xcccccccccccccccdul, 0, 3}, + /* 11 */ {0x2e8ba2e8ba2e8ba3ul, 0, 1}, + /* 12 */ {0xaaaaaaaaaaaaaaabul, 0, 3}, + /* 13 */ {0x4ec4ec4ec4ec4ec5ul, 0, 2}, + /* 14 */ {0x2492492492492493ul, 1, 4}, + /* 15 */ {0x8888888888888889ul, 0, 3}, + /* 16 */ {0ul, 1, 4}, + /* 17 */ {0xf0f0f0f0f0f0f0f1ul, 0, 4}, + /* 18 */ {0xe38e38e38e38e38ful, 0, 4}, + /* 19 */ {0xd79435e50d79435ful, 0, 4}, + /* 20 */ {0xcccccccccccccccdul, 0, 4}, + /* 21 */ {0x8618618618618619ul, 1, 5}, + /* 22 */ {0x2e8ba2e8ba2e8ba3ul, 0, 2}, + /* 23 */ {0x642c8590b21642c9ul, 1, 5}, + /* 24 */ {0xaaaaaaaaaaaaaaabul, 0, 4}, + /* 25 */ {0x47ae147ae147ae15ul, 1, 5}, + /* 26 */ {0x4ec4ec4ec4ec4ec5ul, 0, 3}, + /* 27 */ {0x97b425ed097b425ful, 0, 4}, + /* 28 */ {0x2492492492492493ul, 1, 5}, + /* 29 */ {0x1a7b9611a7b9611bul, 1, 5}, + /* 30 */ {0x8888888888888889ul, 0, 4}, + /* 31 */ {0x0842108421084211ul, 1, 5}, + /* 32 */ {0ul, 1, 5}, + /* 33 */ {0x0f83e0f83e0f83e1ul, 0, 1}, + /* 34 */ {0xf0f0f0f0f0f0f0f1ul, 0, 5}, + /* 35 */ {0xea0ea0ea0ea0ea0ful, 0, 5}, + /* 36 */ {0xe38e38e38e38e38ful, 0, 5} +#endif +#if BITS_PER_MP_LIMB == 32 + /* 2 */ {SEL1(0ul) 1, 1, {0, 31, 0x80000000ul SEL2(0xfffffffful)}}, + /* 3 */ {SEL1(0xaaaaaaabul) 0, 1, {0, 20, 0xcfd41b91ul SEL2(0x3b563c24ul)}}, + /* 4 */ {SEL1(0ul) 1, 2, {1, 15, 0x40000000ul SEL2(0xfffffffful)}}, + /* 5 */ {SEL1(0xcccccccdul) 0, 2, {1, 13, 0x48c27395ul SEL2(0xc25c2684ul)}}, + /* 6 */ {SEL1(0xaaaaaaabul) 0, 2, {0, 12, 0x81bf1000ul SEL2(0xf91bd1b6ul)}}, + /* 7 */ {SEL1(0x24924925ul) 1, 3, {1, 11, 0x75db9c97ul SEL2(0x1607a2cbul)}}, + /* 8 */ {SEL1(0ul) 1, 3, {1, 10, 0x40000000ul SEL2(0xfffffffful)}}, + /* 9 */ {SEL1(0x38e38e39ul) 0, 1, {0, 10, 0xcfd41b91ul SEL2(0x3b563c24ul)}}, + /* 10 */ {SEL1(0xcccccccdul) 0, 3, {2, 9, 0x3b9aca00ul SEL2(0x12e0be82ul)}}, + /* 11 */ {SEL1(0xba2e8ba3ul) 0, 3, {0, 9, 0x8c8b6d2bul SEL2(0xd24cde04ul)}}, + /* 12 */ {SEL1(0xaaaaaaabul) 0, 3, {3, 8, 0x19a10000ul SEL2(0x3fa39ab5ul)}}, + /* 13 */ {SEL1(0x4ec4ec4ful) 0, 2, {2, 8, 0x309f1021ul SEL2(0x50f8ac5ful)}}, + /* 14 */ {SEL1(0x24924925ul) 1, 4, {1, 8, 0x57f6c100ul SEL2(0x74843b1eul)}}, + /* 15 */ {SEL1(0x88888889ul) 0, 3, {0, 8, 0x98c29b81ul SEL2(0xad0326c2ul)}}, + /* 16 */ {SEL1(0ul) 1, 4, {3, 7, 0x10000000ul SEL2(0xfffffffful)}}, + /* 17 */ {SEL1(0xf0f0f0f1ul) 0, 4, {3, 7, 0x18754571ul SEL2(0x4ef0b6bdul)}}, + /* 18 */ {SEL1(0x38e38e39ul) 0, 2, {2, 7, 0x247dbc80ul SEL2(0xc0fc48a1ul)}}, + /* 19 */ {SEL1(0xaf286bcbul) 1, 5, {2, 7, 0x3547667bul SEL2(0x33838942ul)}}, + /* 20 */ {SEL1(0xcccccccdul) 0, 4, {1, 7, 0x4c4b4000ul SEL2(0xad7f29abul)}}, + /* 21 */ {SEL1(0x86186187ul) 1, 5, {1, 7, 0x6b5a6e1dul SEL2(0x313c3d15ul)}}, + /* 22 */ {SEL1(0xba2e8ba3ul) 0, 4, {0, 7, 0x94ace180ul SEL2(0xb8cca9e0ul)}}, + /* 23 */ {SEL1(0xb21642c9ul) 0, 4, {0, 7, 0xcaf18367ul SEL2(0x42ed6de9ul)}}, + /* 24 */ {SEL1(0xaaaaaaabul) 0, 4, {4, 6, 0x0b640000ul SEL2(0x67980e0bul)}}, + /* 25 */ {SEL1(0x51eb851ful) 0, 3, {4, 6, 0x0e8d4a51ul SEL2(0x19799812ul)}}, + /* 26 */ {SEL1(0x4ec4ec4ful) 0, 3, {3, 6, 0x1269ae40ul SEL2(0xbce85396ul)}}, + /* 27 */ {SEL1(0x2f684bdbul) 1, 5, {3, 6, 0x17179149ul SEL2(0x62c103a9ul)}}, + /* 28 */ {SEL1(0x24924925ul) 1, 5, {3, 6, 0x1cb91000ul SEL2(0x1d353d43ul)}}, + /* 29 */ {SEL1(0x8d3dcb09ul) 0, 4, {2, 6, 0x23744899ul SEL2(0xce1deceaul)}}, + /* 30 */ {SEL1(0x88888889ul) 0, 4, {2, 6, 0x2b73a840ul SEL2(0x790fc511ul)}}, + /* 31 */ {SEL1(0x08421085ul) 1, 5, {2, 6, 0x34e63b41ul SEL2(0x35b865a0ul)}}, + /* 32 */ {SEL1(0ul) 1, 5, {1, 6, 0x40000000ul SEL2(0xfffffffful)}}, + /* 33 */ {SEL1(0x3e0f83e1ul) 0, 3, {1, 6, 0x4cfa3cc1ul SEL2(0xa9aed1b3ul)}}, + /* 34 */ {SEL1(0xf0f0f0f1ul) 0, 5, {1, 6, 0x5c13d840ul SEL2(0x63dfc229ul)}}, + /* 35 */ {SEL1(0xd41d41d5ul) 1, 6, {1, 6, 0x6d91b519ul SEL2(0x2b0fee30ul)}}, + /* 36 */ {SEL1(0x38e38e39ul) 0, 3, {0, 6, 0x81bf1000ul SEL2(0xf91bd1b6ul)}} +#endif +}; + /* Lower-case digits. */ -const char _itoa_lower_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +static const char _itoa_lower_digits[] + = "0123456789abcdefghijklmnopqrstuvwxyz"; /* Upper-case digits. */ -const char _itoa_upper_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static const char _itoa_upper_digits[] + = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -/* Cause _itoa.h to define _itoa as a real function instead of an - `extern inline'. */ -#define _EXTERN_INLINE /* empty */ -#include "_itoa.h" +char * +_itoa (value, buflim, base, upper_case) + unsigned long long int value; + char *buflim; + unsigned int base; + int upper_case; +{ + const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits; + char *bp = buflim; + const struct base_table_t *brec = &base_table[base - 2]; + + switch (base) + { +#define RUN_2N(BITS) \ + do \ + { \ + /* `unsigned long long int' always has 64 bits. */ \ + mp_limb work_hi = value >> (64 - BITS_PER_MP_LIMB); \ + \ + if (BITS_PER_MP_LIMB == 32) \ + if (work_hi != 0) \ + { \ + mp_limb work_lo; \ + int cnt; \ + \ + work_lo = value & 0xfffffffful; \ + for (cnt = BITS_PER_MP_LIMB / BITS; cnt > 0; --cnt) \ + { \ + *--bp = digits[work_lo & ((1ul << BITS) - 1)]; \ + work_lo >>= BITS; \ + } \ + if (BITS_PER_MP_LIMB % BITS != 0) \ + { \ + work_lo |= ((work_hi \ + & ((1 << BITS - BITS_PER_MP_LIMB % BITS) \ + - 1)) \ + << BITS_PER_MP_LIMB % BITS); \ + *--bp = digits[work_lo]; \ + work_hi >>= BITS - BITS_PER_MP_LIMB % BITS; \ + } \ + } \ + else \ + work_hi = value & 0xfffffffful; \ + do \ + { \ + *--bp = digits[work_hi & ((1 << BITS) - 1)]; \ + work_hi >>= BITS; \ + } \ + while (work_hi != 0); \ + } \ + while (0) + case 8: + RUN_2N (3); + break; + + case 16: + RUN_2N (4); + break; + + default: + { +#if BITS_PER_MP_LIMB == 64 + mp_limb base_multiplier = brec->base_multiplier; + if (brec->flag) + while (value != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, value, base_multiplier); + quo = (x + ((value - x) >> 1)) >> (brec->post_shift - 1); + rem = value - quo * base; + *--bp = digits[rem]; + value = quo; + } + else + while (value != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, value, base_multiplier); + quo = x >> brec->post_shift; + rem = value - quo * base; + *--bp = digits[rem]; + value = quo; + } +#endif +#if BITS_PER_MP_LIMB == 32 + mp_limb t[3]; + int n; + + /* First convert x0 to 1-3 words in base s->big.base. + Optimize for frequent cases of 32 bit numbers. */ + if ((mp_limb) (value >> 32) >= 1) + { + int big_normalization_steps = brec->big.normalization_steps; + mp_limb big_base_norm = brec->big.base << big_normalization_steps; + + if ((mp_limb) (value >> 32) >= brec->big.base) + { + mp_limb x1hi, x1lo, r; + /* If you want to optimize this, take advantage of + that the quotient in the first udiv_qrnnd will + always be very small. It might be faster just to + subtract in a tight loop. */ + +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb x, xh, xl; + + if (big_normalization_steps == 0) + xh = 0; + else + xh = (mp_limb) (value >> 64 - big_normalization_steps); + xl = (mp_limb) (value >> 32 - big_normalization_steps); + udiv_qrnnd_preinv (x1hi, r, xh, xl, big_base_norm, + brec->big.base_ninv); + + xl = ((mp_limb) value) << big_normalization_steps; + udiv_qrnnd_preinv (x1lo, x, r, xl, big_base_norm, + big_normalization_steps); + t[2] = x >> big_normalization_steps; + + if (big_normalization_steps == 0) + xh = x1hi; + else + xh = ((x1hi << big_normalization_steps) + | (x1lo >> 32 - big_normalization_steps)); + xl = x1lo << big_normalization_steps; + udiv_qrnnd_preinv (t[0], x, xh, xl, big_base_norm, + big_normalization_steps); + t[1] = x >> big_normalization_steps; +#elif UDIV_NEEDS_NORMALIZATION + mp_limb x, xh, xl; + + if (big_normalization_steps == 0) + xh = 0; + else + xh = (mp_limb) (value >> 64 - big_normalization_steps); + xl = (mp_limb) (value >> 32 - big_normalization_steps); + udiv_qrnnd (x1hi, r, xh, xl, big_base_norm); + + xl = ((mp_limb) value) << big_normalization_steps; + udiv_qrnnd (x1lo, x, r, xl, big_base_norm); + t[2] = x >> big_normalization_steps; + + if (big_normalization_steps == 0) + xh = x1hi; + else + xh = ((x1hi << big_normalization_steps) + | (x1lo >> 32 - big_normalization_steps)); + xl = x1lo << big_normalization_steps; + udiv_qrnnd (t[0], x, xh, xl, big_base_norm); + t[1] = x >> big_normalization_steps; +#else + udiv_qrnnd (x1hi, r, 0, (mp_limb) (value >> 32), + brec->big.base); + udiv_qrnnd (x1lo, t[2], r, (mp_limb) value, brec->big.base); + udiv_qrnnd (t[0], t[1], x1hi, x1lo, brec->big.base); +#endif + n = 3; + } + else + { +#if (UDIV_TIME > 2 * UMUL_TIME) + mp_limb x; + + value <<= brec->big.normalization_steps; + udiv_qrnnd_preinv (t[0], x, (mp_limb) (value >> 32), + (mp_limb) value, big_base_norm, + brec->big.base_ninv); + t[1] = x >> brec->big.normalization_steps; +#elif UDIV_NEEDS_NORMALIZATION + mp_limb x; + + value <<= big_normalization_steps; + udiv_qrnnd (t[0], x, (mp_limb) (value >> 32), + (mp_limb) value, big_base_norm); + t[1] = x >> big_normalization_steps; +#else + udiv_qrnnd (t[0], t[1], (mp_limb) (value >> 32), + (mp_limb) value, brec->big.base); +#endif + n = 2; + } + } + else + { + t[0] = value; + n = 1; + } + + /* Convert the 1-3 words in t[], word by word, to ASCII. */ + do + { + mp_limb ti = t[--n]; + int ndig_for_this_limb = 0; + +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb base_multiplier = brec->base_multiplier; + if (brec->flag) + while (ti != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, ti, base_multiplier); + quo = (x + ((ti - x) >> 1)) >> (brec->post_shift - 1); + rem = ti - quo * base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } + else + while (ti != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, ti, base_multiplier); + quo = x >> brec->post_shift; + rem = ti - quo * base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } +#else + while (ti != 0) + { + mp_limb quo, rem; + + quo = ti / base; + rem = ti % base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } +#endif + /* If this wasn't the most significant word, pad with zeros. */ + if (n != 0) + while (ndig_for_this_limb < brec->big.ndigits) + { + *--bp = '0'; + ++ndig_for_this_limb; + } + } + while (n != 0); +#endif + } + break; + } + + return bp; +} |