aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/sysdeps/i386
diff options
context:
space:
mode:
authorZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
committerZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
commit5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch)
tree4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/i386
parent199fc19d3aaaf57944ef036e15904febe877fc93 (diff)
downloadglibc-zack/build-layout-experiment.tar
glibc-zack/build-layout-experiment.tar.gz
glibc-zack/build-layout-experiment.tar.bz2
glibc-zack/build-layout-experiment.zip
Prepare for radical source tree reorganization.zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/i386')
-rw-r--r--REORG.TODO/sysdeps/i386/Implies5
-rw-r--r--REORG.TODO/sysdeps/i386/Makefile103
-rw-r--r--REORG.TODO/sysdeps/i386/Versions35
-rw-r--r--REORG.TODO/sysdeps/i386/____longjmp_chk.S1
-rw-r--r--REORG.TODO/sysdeps/i386/__longjmp.S72
-rw-r--r--REORG.TODO/sysdeps/i386/abort-instr.h2
-rw-r--r--REORG.TODO/sysdeps/i386/add_n.S111
-rw-r--r--REORG.TODO/sysdeps/i386/addmul_1.S86
-rw-r--r--REORG.TODO/sysdeps/i386/asm-syntax.h24
-rw-r--r--REORG.TODO/sysdeps/i386/atomic-machine.h545
-rw-r--r--REORG.TODO/sysdeps/i386/backtrace.c163
-rw-r--r--REORG.TODO/sysdeps/i386/bcopy.S4
-rw-r--r--REORG.TODO/sysdeps/i386/bsd-_setjmp.S56
-rw-r--r--REORG.TODO/sysdeps/i386/bsd-setjmp.S66
-rw-r--r--REORG.TODO/sysdeps/i386/bzero.S5
-rw-r--r--REORG.TODO/sysdeps/i386/cacheinfo.c3
-rw-r--r--REORG.TODO/sysdeps/i386/configure84
-rw-r--r--REORG.TODO/sysdeps/i386/configure.ac52
-rw-r--r--REORG.TODO/sysdeps/i386/crti.S84
-rw-r--r--REORG.TODO/sysdeps/i386/crtn.S47
-rw-r--r--REORG.TODO/sysdeps/i386/dl-irel.h51
-rw-r--r--REORG.TODO/sysdeps/i386/dl-lookupcfg.h32
-rw-r--r--REORG.TODO/sysdeps/i386/dl-machine.h757
-rw-r--r--REORG.TODO/sysdeps/i386/dl-procinfo.c65
-rw-r--r--REORG.TODO/sysdeps/i386/dl-tls.h61
-rw-r--r--REORG.TODO/sysdeps/i386/dl-tlsdesc.S285
-rw-r--r--REORG.TODO/sysdeps/i386/dl-tlsdesc.h61
-rw-r--r--REORG.TODO/sysdeps/i386/dl-trampoline.S215
-rw-r--r--REORG.TODO/sysdeps/i386/ffs.c50
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/Implies1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/Versions6
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/doasin.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_acos.S25
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_acosf.S24
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_acosh.S101
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_acoshf.S101
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_acoshl.S107
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_acosl.c29
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_asin.S38
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_asinf.S39
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_atan2.S30
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_atan2f.S30
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_atan2l.c19
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_atanh.S112
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_atanhf.S109
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_atanhl.S127
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp.S73
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp10.S53
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp10f.S53
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp10l.S2
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp2.S52
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp2f.S52
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_exp2l.S60
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_expf.S74
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_expl.S226
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_fmod.S18
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_fmodf.S19
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_fmodl.c23
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_hypot.S75
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_hypotf.S64
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_ilogb.S42
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_ilogbf.S42
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_ilogbl.S43
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log.S92
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log10.S68
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log10f.S69
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log10l.S71
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log2.S69
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log2f.S69
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_log2l.S70
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_logf.S93
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_logl.S97
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_pow.S456
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_powf.S392
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_powl.S459
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_rem_pio2.c3
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_remainder.S18
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_remainderf.S18
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_remainderl.S20
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_scalb.S100
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_scalbf.S102
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_scalbl.S90
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_sqrt.S23
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_sqrtf.S13
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/e_sqrtl.c20
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fclrexcpt.c69
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fedisblxcpt.c54
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/feenablxcpt.c54
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fegetenv.c49
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fegetexcept.c31
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fegetmode.c32
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fegetround.c33
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/feholdexcpt.c50
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fenv_private.h501
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fesetenv.c131
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fesetexcept.c31
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fesetmode.c54
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fesetround.c54
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/feupdateenv.c60
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fgetexcptflg.c57
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fraiseexcpt.c124
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/fsetexcptflg.c69
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/ftestexcept.c40
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/halfulp.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/i386-math-asm.h340
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/libm-test-ulps2202
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/libm-test-ulps-name1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/math-tests.h27
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/math_private.h7
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/mpatan.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/mpatan2.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/mpexp.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/mplog.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/mpsqrt.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_asinh.S139
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_asinhf.S139
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_asinhl.S144
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_atan.S30
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_atanf.S30
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_atanl.c22
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_cbrt.S200
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_cbrtf.S177
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_cbrtl.S229
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_ceil.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_ceilf.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_ceill.S40
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_copysign.S20
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_copysignf.S20
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_copysignl.S21
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_expm1.S113
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_expm1f.S113
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_expm1l.S2
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fabs.S9
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fabsf.S9
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fabsl.S9
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fdim.c50
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_finite.S17
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_finitef.S16
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_finitel.S15
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_floor.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_floorf.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_floorl.S40
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fmax.S43
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fmaxf.S43
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fmaxl.S71
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fmin.S43
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fminf.S43
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fminl.S71
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_fpclassifyl.c42
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_frexp.S83
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_frexpf.S80
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_frexpl.S92
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_isinfl.c32
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_isnanl.c43
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_llrint.S36
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_llrintf.S36
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_llrintl.S36
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_log1p.S67
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_log1pf.S67
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_log1pl.S76
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_logb.S16
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_logbf.S16
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_logbl.c19
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_lrint.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_lrintf.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_lrintl.S34
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_nearbyint.S20
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_nearbyintf.S20
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_nearbyintl.S23
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_nextafterl.c125
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_nexttoward.c93
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_nexttowardf.c77
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_remquo.S45
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_remquof.S45
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_remquol.S45
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_rint.S15
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_rintf.S15
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_rintl.c18
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_scalbln.c2
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_scalblnf.c2
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_scalblnl.c2
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_scalbn.S24
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_scalbnf.S24
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_scalbnl.S23
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_significand.S16
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_significandf.S16
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_significandl.c19
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_trunc.S37
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_truncf.S37
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/s_truncl.S40
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/slowexp.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/slowpow.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/t_exp.c1
-rw-r--r--REORG.TODO/sysdeps/i386/fpu/w_sqrt_compat.c8
-rw-r--r--REORG.TODO/sysdeps/i386/gccframe.h27
-rw-r--r--REORG.TODO/sysdeps/i386/gmp-mparam.h28
-rw-r--r--REORG.TODO/sysdeps/i386/htonl.S34
-rw-r--r--REORG.TODO/sysdeps/i386/htons.S35
-rw-r--r--REORG.TODO/sysdeps/i386/i386-mcount.S79
-rw-r--r--REORG.TODO/sysdeps/i386/i586/add_n.S143
-rw-r--r--REORG.TODO/sysdeps/i386/i586/addmul_1.S94
-rw-r--r--REORG.TODO/sysdeps/i386/i586/bzero.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i586/init-arch.h19
-rw-r--r--REORG.TODO/sysdeps/i386/i586/lshift.S255
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memcopy.h95
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memcpy.S124
-rw-r--r--REORG.TODO/sysdeps/i386/i586/mempcpy.S8
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memset.S121
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memusage.h1
-rw-r--r--REORG.TODO/sysdeps/i386/i586/mul_1.S90
-rw-r--r--REORG.TODO/sysdeps/i386/i586/rshift.S255
-rw-r--r--REORG.TODO/sysdeps/i386/i586/stpcpy.S8
-rw-r--r--REORG.TODO/sysdeps/i386/i586/strchr.S348
-rw-r--r--REORG.TODO/sysdeps/i386/i586/strcpy.S169
-rw-r--r--REORG.TODO/sysdeps/i386/i586/strlen.S182
-rw-r--r--REORG.TODO/sysdeps/i386/i586/sub_n.S143
-rw-r--r--REORG.TODO/sysdeps/i386/i586/submul_1.S94
-rw-r--r--REORG.TODO/sysdeps/i386/i686/Makefile12
-rw-r--r--REORG.TODO/sysdeps/i386/i686/add_n.S110
-rw-r--r--REORG.TODO/sysdeps/i386/i686/bcopy.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/bzero.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/dl-hash.h79
-rw-r--r--REORG.TODO/sysdeps/i386/i686/ffs.c48
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/e_log.S29
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/e_logf.S30
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/e_logl.S94
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/Makefile4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S22
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S325
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf.c37
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps2188
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name1
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S553
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf.c29
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S586
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c30
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S566
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf.c28
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/s_fmax.S39
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxf.S39
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxl.S58
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/s_fmin.S37
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/s_fminf.S37
-rw-r--r--REORG.TODO/sysdeps/i386/i686/fpu/s_fminl.S58
-rw-r--r--REORG.TODO/sysdeps/i386/i686/hp-timing.h42
-rw-r--r--REORG.TODO/sysdeps/i386/i686/init-arch.h19
-rw-r--r--REORG.TODO/sysdeps/i386/i686/memcmp.S408
-rw-r--r--REORG.TODO/sysdeps/i386/i686/memcpy.S98
-rw-r--r--REORG.TODO/sysdeps/i386/i686/memmove.S120
-rw-r--r--REORG.TODO/sysdeps/i386/i686/mempcpy.S65
-rw-r--r--REORG.TODO/sysdeps/i386/i686/memset.S100
-rw-r--r--REORG.TODO/sysdeps/i386/i686/memusage.h21
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/Makefile44
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bcopy.S59
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/bzero.S62
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/ifunc-impl-list.c376
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/locale-defines.sym11
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S502
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2.S709
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memchr.S65
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-sse4.S1225
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-ssse3.S2157
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcmp.S62
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S681
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S1809
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3.S3162
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcpy.S78
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memcpy_chk.S50
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memmove.S89
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memmove_chk.S94
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy.S81
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy_chk.S50
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-c.c7
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S417
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2.S724
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memrchr.S45
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2-rep.S811
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2.S860
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memset.S75
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/memset_chk.S82
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr.S65
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/rtld-strnlen.c1
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/s_fma-fma.c27
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/s_fma.c34
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf-fma.c27
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf.c34
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/sched_cpucount.c1
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-sse2.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy.S9
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-sse2.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy.S8
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp-c.c12
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp.S39
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c13
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l.S7
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcat-sse2.S1245
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcat-ssse3.S572
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcat.S92
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S158
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2.S348
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strchr.S57
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-sse4.S804
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-ssse3.S2810
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcmp.S95
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-sse2.S2250
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-ssse3.S3901
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcpy.S116
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcspn-c.c2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strcspn.S75
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S125
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2.S695
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strlen.S60
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncase-c.c8
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncase.S39
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-c.c13
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-sse4.S2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l.S7
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncat-c.c8
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncat-sse2.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncat-ssse3.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncat.S5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-c.c8
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-sse4.S5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-ssse3.S5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncmp.S5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-c.c8
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-sse2.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-ssse3.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strncpy.S5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-c.c10
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-sse2.S3
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strnlen.S37
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk-c.c2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk.S5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S282
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2.S708
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strrchr.S57
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strspn-c.c2
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/strspn.S56
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/test-multiarch.c1
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/varshift.c1
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/varshift.h1
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-c.c22
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-sse2.S219
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcschr.S36
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-c.c14
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-sse2.S1018
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp.S39
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-c.c5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S600
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy.S36
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-c.c9
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-sse2.S193
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcslen.S37
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-c.c5
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S354
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr.S35
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-c.c9
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp.S40
-rw-r--r--REORG.TODO/sysdeps/i386/i686/nptl/tls.h35
-rw-r--r--REORG.TODO/sysdeps/i386/i686/pthread_spin_trylock.S20
-rw-r--r--REORG.TODO/sysdeps/i386/i686/stack-aliasing.h23
-rw-r--r--REORG.TODO/sysdeps/i386/i686/strcmp.S52
-rw-r--r--REORG.TODO/sysdeps/i386/i686/tst-stack-align.h44
-rw-r--r--REORG.TODO/sysdeps/i386/i786/Implies2
-rw-r--r--REORG.TODO/sysdeps/i386/init-arch.h19
-rw-r--r--REORG.TODO/sysdeps/i386/jmpbuf-offsets.h25
-rw-r--r--REORG.TODO/sysdeps/i386/jmpbuf-unwind.h47
-rw-r--r--REORG.TODO/sysdeps/i386/ldbl2mpn.c120
-rw-r--r--REORG.TODO/sysdeps/i386/ldsodefs.h41
-rw-r--r--REORG.TODO/sysdeps/i386/link-defines.sym20
-rw-r--r--REORG.TODO/sysdeps/i386/lshift.S103
-rw-r--r--REORG.TODO/sysdeps/i386/machine-gmon.h40
-rw-r--r--REORG.TODO/sysdeps/i386/memchr.S322
-rw-r--r--REORG.TODO/sysdeps/i386/memcmp.S73
-rw-r--r--REORG.TODO/sysdeps/i386/memcopy.h92
-rw-r--r--REORG.TODO/sysdeps/i386/memcpy.S95
-rw-r--r--REORG.TODO/sysdeps/i386/memcpy_chk.S34
-rw-r--r--REORG.TODO/sysdeps/i386/memmove.S4
-rw-r--r--REORG.TODO/sysdeps/i386/memmove_chk.S33
-rw-r--r--REORG.TODO/sysdeps/i386/mempcpy.S7
-rw-r--r--REORG.TODO/sysdeps/i386/mempcpy_chk.S33
-rw-r--r--REORG.TODO/sysdeps/i386/memset.S68
-rw-r--r--REORG.TODO/sysdeps/i386/memset_chk.S33
-rw-r--r--REORG.TODO/sysdeps/i386/memusage.h20
-rw-r--r--REORG.TODO/sysdeps/i386/mp_clz_tab.c1
-rw-r--r--REORG.TODO/sysdeps/i386/mul_1.S86
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/Makefile26
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/pthread_spin_init.c19
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/pthread_spin_lock.S37
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/pthread_spin_unlock.S31
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/pthreaddef.h40
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/tcb-offsets.sym17
-rw-r--r--REORG.TODO/sysdeps/i386/nptl/tls.h435
-rw-r--r--REORG.TODO/sysdeps/i386/preconfigure5
-rw-r--r--REORG.TODO/sysdeps/i386/pthread_spin_trylock.S46
-rw-r--r--REORG.TODO/sysdeps/i386/rawmemchr.S222
-rw-r--r--REORG.TODO/sysdeps/i386/rshift.S105
-rw-r--r--REORG.TODO/sysdeps/i386/setfpucw.c54
-rw-r--r--REORG.TODO/sysdeps/i386/setjmp.S58
-rw-r--r--REORG.TODO/sysdeps/i386/stackguard-macros.h12
-rw-r--r--REORG.TODO/sysdeps/i386/stackinfo.h43
-rw-r--r--REORG.TODO/sysdeps/i386/start.S139
-rw-r--r--REORG.TODO/sysdeps/i386/stpcpy.S88
-rw-r--r--REORG.TODO/sysdeps/i386/stpncpy.S147
-rw-r--r--REORG.TODO/sysdeps/i386/strcat.S265
-rw-r--r--REORG.TODO/sysdeps/i386/strchr.S290
-rw-r--r--REORG.TODO/sysdeps/i386/strchrnul.S278
-rw-r--r--REORG.TODO/sysdeps/i386/strcspn.S240
-rw-r--r--REORG.TODO/sysdeps/i386/string-inlines.c47
-rw-r--r--REORG.TODO/sysdeps/i386/strlen.S132
-rw-r--r--REORG.TODO/sysdeps/i386/strlen.c35
-rw-r--r--REORG.TODO/sysdeps/i386/strpbrk.S243
-rw-r--r--REORG.TODO/sysdeps/i386/strrchr.S334
-rw-r--r--REORG.TODO/sysdeps/i386/strspn.S240
-rw-r--r--REORG.TODO/sysdeps/i386/sub_n.S111
-rw-r--r--REORG.TODO/sysdeps/i386/submul_1.S86
-rw-r--r--REORG.TODO/sysdeps/i386/symbol-hacks.h21
-rw-r--r--REORG.TODO/sysdeps/i386/sys/ucontext.h139
-rw-r--r--REORG.TODO/sysdeps/i386/sysdep.h159
-rw-r--r--REORG.TODO/sysdeps/i386/tls-macros.h78
-rw-r--r--REORG.TODO/sysdeps/i386/tlsdesc.c268
-rw-r--r--REORG.TODO/sysdeps/i386/tlsdesc.sym17
-rw-r--r--REORG.TODO/sysdeps/i386/tst-audit.h25
-rw-r--r--REORG.TODO/sysdeps/i386/tst-audit3.c37
-rw-r--r--REORG.TODO/sysdeps/i386/tst-audit3.h20
-rw-r--r--REORG.TODO/sysdeps/i386/tst-auditmod3a.c38
-rw-r--r--REORG.TODO/sysdeps/i386/tst-auditmod3b.c186
-rwxr-xr-xREORG.TODO/sysdeps/i386/tst-ld-sse-use.sh103
-rw-r--r--REORG.TODO/sysdeps/i386/tst-stack-align.h41
450 files changed, 62011 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/i386/Implies b/REORG.TODO/sysdeps/i386/Implies
new file mode 100644
index 0000000000..20b2dffc29
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/Implies
@@ -0,0 +1,5 @@
+x86
+wordsize-32
+ieee754/ldbl-96
+ieee754/dbl-64
+ieee754/flt-32
diff --git a/REORG.TODO/sysdeps/i386/Makefile b/REORG.TODO/sysdeps/i386/Makefile
new file mode 100644
index 0000000000..e30e1339f0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/Makefile
@@ -0,0 +1,103 @@
+# The mpn functions need a #define for asm syntax flavor.
+# Every i386 port in use uses gas syntax (I think).
+asm-CPPFLAGS += -DGAS_SYNTAX
+
+# The i386 `long double' is a distinct type we support.
+long-double-fcts = yes
+
+ifeq ($(subdir),string)
+sysdep_routines += cacheinfo
+endif
+
+ifeq ($(subdir),gmon)
+sysdep_routines += i386-mcount
+endif
+
+ifeq ($(subdir),elf)
+CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused
+CFLAGS-dl-load.c += -Wno-unused
+CFLAGS-dl-reloc.c += -Wno-unused
+endif
+
+ifeq ($(subdir),debug)
+CFLAGS-backtrace.c += -fexceptions
+endif
+
+# Most of the glibc routines don't ever call user defined callbacks
+# nor use any FPU or SSE* and as such don't need bigger %esp alignment
+# than 4 bytes.
+# Lots of routines in math will use FPU, so make math subdir an exception
+# here.
+# In gcc 4.6 (and maybe earlier?) giving -mpreferred-stack-boundary=2 is
+# an error, so don't try to reduce it here like we used to. We still
+# explicit set -mpreferred-stack-boundary=4 the places where it matters,
+# in case an older compiler defaulted to 2.
+ifeq ($(subdir),math)
+sysdep-CFLAGS += -mpreferred-stack-boundary=4
+else
+ifeq ($(subdir),csu)
+sysdep-CFLAGS += -mpreferred-stack-boundary=4
+gen-as-const-headers += link-defines.sym
+else
+# Likewise, any function which calls user callbacks
+uses-callbacks += -mpreferred-stack-boundary=4
+# Likewise, any stack alignment tests
+stack-align-test-flags += -malign-double -mpreferred-stack-boundary=4
+endif
+endif
+
+# And a couple of other routines
+ifeq ($(subdir),stdlib)
+CFLAGS-exit.c += -mpreferred-stack-boundary=4
+CFLAGS-cxa_finalize.c += -mpreferred-stack-boundary=4
+endif
+ifeq ($(subdir),elf)
+CFLAGS-dl-init.c += -mpreferred-stack-boundary=4
+CFLAGS-dl-fini.c += -mpreferred-stack-boundary=4
+CFLAGS-dl-open.c += -mpreferred-stack-boundary=4
+CFLAGS-dl-close.c += -mpreferred-stack-boundary=4
+CFLAGS-dl-error.c += -mpreferred-stack-boundary=4
+endif
+ifeq ($(subdir),dlfcn)
+CFLAGS-dlopen.c += -mpreferred-stack-boundary=4
+CFLAGS-dlopenold.c += -mpreferred-stack-boundary=4
+CFLAGS-dlclose.c += -mpreferred-stack-boundary=4
+CFLAGS-dlerror.c += -mpreferred-stack-boundary=4
+endif
+
+ifneq (,$(filter -mno-tls-direct-seg-refs,$(CFLAGS)))
+defines += -DNO_TLS_DIRECT_SEG_REFS
+endif
+
+ifeq ($(subdir),elf)
+sysdep-dl-routines += tlsdesc dl-tlsdesc
+
+tests += tst-audit3
+modules-names += tst-auditmod3a tst-auditmod3b
+
+$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
+$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
+tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so
+endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tlsdesc.sym
+endif
+
+# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since
+# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters
+# which must be preserved.
+# With SSE disabled, ensure -fpmath is not set to use sse either.
+rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387
+ifeq ($(subdir),elf)
+CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
+ $(rtld-CFLAGS))
+
+tests-special += $(objpfx)tst-ld-sse-use.out
+$(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so
+ @echo "Checking ld.so for SSE register use. This will take a few seconds..."
+ $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \
+ $(evaluate-test)
+else
+CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS))
+endif
diff --git a/REORG.TODO/sysdeps/i386/Versions b/REORG.TODO/sysdeps/i386/Versions
new file mode 100644
index 0000000000..7be44aad7a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/Versions
@@ -0,0 +1,35 @@
+ld {
+ GLIBC_2.3 {
+ # The alternative i386 runtime interface to TLS.
+ ___tls_get_addr;
+ }
+}
+libc {
+ GLIBC_2.0 {
+ # Functions from libgcc.
+ __divdi3; __moddi3; __udivdi3; __umoddi3;
+ }
+ GLIBC_2.1 {
+ # global variable
+ _fp_hw;
+ }
+ GLIBC_2.1.1 {
+ # extern inline functions used by <bits/string.h>
+ __memcpy_c; __memset_cc; __memset_cg; __memset_gg;
+ __memcpy_by2; __memcpy_by4; __memcpy_g; __mempcpy_by2; __mempcpy_by4;
+ __mempcpy_byn; __memset_ccn_by2; __memset_ccn_by4; __memset_gcn_by2;
+ __memset_gcn_by4; __stpcpy_g; __strcat_c; __strcat_g; __strchr_c;
+ __strchr_g; __strchrnul_c; __strchrnul_g; __strcmp_gg; __strcpy_g;
+ __strcspn_c1; __strcspn_cg; __strcspn_g; __strlen_g; __strncat_g;
+ __strncmp_g; __strncpy_by2; __strncpy_by4; __strncpy_byn; __strncpy_gg;
+ __strpbrk_cg; __strpbrk_g; __strrchr_c; __strrchr_g; __strspn_c1;
+ __strspn_cg; __strspn_g; __strstr_cg; __strstr_g;
+ }
+}
+libm {
+ GLIBC_2.1 {
+ # A generic bug got this omitted from other configurations' version
+ # sets, but we always had it.
+ exp2l;
+ }
+}
diff --git a/REORG.TODO/sysdeps/i386/____longjmp_chk.S b/REORG.TODO/sysdeps/i386/____longjmp_chk.S
new file mode 100644
index 0000000000..0910861a9d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/____longjmp_chk.S
@@ -0,0 +1 @@
+#error "OS-specific version needed"
diff --git a/REORG.TODO/sysdeps/i386/__longjmp.S b/REORG.TODO/sysdeps/i386/__longjmp.S
new file mode 100644
index 0000000000..3719763cd6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/__longjmp.S
@@ -0,0 +1,72 @@
+/* longjmp for i386.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+#include <asm-syntax.h>
+#include <stap-probe.h>
+
+ .text
+ENTRY (__longjmp)
+#ifdef PTR_DEMANGLE
+ movl 4(%esp), %eax /* User's jmp_buf in %eax. */
+
+ /* Save the return address now. */
+ movl (JB_PC*4)(%eax), %edx
+ /* Get the stack pointer. */
+ movl (JB_SP*4)(%eax), %ecx
+ PTR_DEMANGLE (%edx)
+ PTR_DEMANGLE (%ecx)
+ LIBC_PROBE (longjmp, 3, 4@%eax, -4@8(%esp), 4@%edx)
+ cfi_def_cfa(%eax, 0)
+ cfi_register(%eip, %edx)
+ cfi_register(%esp, %ecx)
+ cfi_offset(%ebx, JB_BX*4)
+ cfi_offset(%esi, JB_SI*4)
+ cfi_offset(%edi, JB_DI*4)
+ cfi_offset(%ebp, JB_BP*4)
+ /* Restore registers. */
+ movl (JB_BX*4)(%eax), %ebx
+ movl (JB_SI*4)(%eax), %esi
+ movl (JB_DI*4)(%eax), %edi
+ movl (JB_BP*4)(%eax), %ebp
+ cfi_restore(%ebx)
+ cfi_restore(%esi)
+ cfi_restore(%edi)
+ cfi_restore(%ebp)
+
+ LIBC_PROBE (longjmp_target, 3, 4@%eax, -4@8(%esp), 4@%edx)
+ movl 8(%esp), %eax /* Second argument is return value. */
+ movl %ecx, %esp
+#else
+ movl 4(%esp), %ecx /* User's jmp_buf in %ecx. */
+ movl 8(%esp), %eax /* Second argument is return value. */
+ /* Save the return address now. */
+ movl (JB_PC*4)(%ecx), %edx
+ LIBC_PROBE (longjmp, 3, 4@%ecx, -4@%eax, 4@%edx)
+ /* Restore registers. */
+ movl (JB_BX*4)(%ecx), %ebx
+ movl (JB_SI*4)(%ecx), %esi
+ movl (JB_DI*4)(%ecx), %edi
+ movl (JB_BP*4)(%ecx), %ebp
+ movl (JB_SP*4)(%ecx), %esp
+ LIBC_PROBE (longjmp_target, 3, 4@%ecx, -4@%ecx, 4@%edx)
+#endif
+ /* Jump to saved PC. */
+ jmp *%edx
+END (__longjmp)
diff --git a/REORG.TODO/sysdeps/i386/abort-instr.h b/REORG.TODO/sysdeps/i386/abort-instr.h
new file mode 100644
index 0000000000..810f10379b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/abort-instr.h
@@ -0,0 +1,2 @@
+/* An instruction which should crash any program is `hlt'. */
+#define ABORT_INSTRUCTION asm ("hlt")
diff --git a/REORG.TODO/sysdeps/i386/add_n.S b/REORG.TODO/sysdeps/i386/add_n.S
new file mode 100644
index 0000000000..c2923094a8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/add_n.S
@@ -0,0 +1,111 @@
+/* Add two limb vectors of the same length > 0 and store sum in a third
+ limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define S2 S1+4
+#define SIZE S2+4
+
+ .text
+ENTRY (__mpn_add_n)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 4)
+ movl S1(%esp),%esi
+ cfi_rel_offset (esi, 0)
+ movl S2(%esp),%edx
+ movl SIZE(%esp),%ecx
+ movl %ecx,%eax
+ shrl $3,%ecx /* compute count for unrolled loop */
+ negl %eax
+ andl $7,%eax /* get index where to start loop */
+ jz L(oop) /* necessary special case for 0 */
+ incl %ecx /* adjust loop count */
+ shll $2,%eax /* adjustment for pointers... */
+ subl %eax,%edi /* ... since they are offset ... */
+ subl %eax,%esi /* ... by a constant when we ... */
+ subl %eax,%edx /* ... enter the loop */
+ shrl $2,%eax /* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC. Due to limitations in some
+ assemblers, Loop-L0-3 cannot be put into the leal */
+ call L(0)
+ cfi_adjust_cfa_offset (4)
+L(0): leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $(L(oop)-L(0)-3),%eax
+ addl $4,%esp
+ cfi_adjust_cfa_offset (-4)
+#else
+/* Calculate start address in loop for non-PIC. */
+ leal (L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+ jmp *%eax /* jump into loop */
+ ALIGN (3)
+L(oop): movl (%esi),%eax
+ adcl (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ adcl 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ adcl 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ adcl 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ adcl 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ adcl 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ adcl 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ adcl 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz L(oop)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_add_n)
diff --git a/REORG.TODO/sysdeps/i386/addmul_1.S b/REORG.TODO/sysdeps/i386/addmul_1.S
new file mode 100644
index 0000000000..ad90ea53e5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/addmul_1.S
@@ -0,0 +1,86 @@
+/* i80386 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ the result to a second limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define SIZE S1+4
+#define S2LIMB SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define sizeP ecx
+#define s2_limb ebx
+
+ .text
+ENTRY (__mpn_addmul_1)
+
+ pushl %res_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %s1_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %s2_limb
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp), %res_ptr
+ cfi_rel_offset (res_ptr, 12)
+ movl S1(%esp), %s1_ptr
+ cfi_rel_offset (s1_ptr, 8)
+ movl SIZE(%esp), %sizeP
+ movl S2LIMB(%esp), %s2_limb
+ cfi_rel_offset (s2_limb, 0)
+ leal (%res_ptr,%sizeP,4), %res_ptr
+ leal (%s1_ptr,%sizeP,4), %s1_ptr
+ negl %sizeP
+ xorl %ebp, %ebp
+ cfi_rel_offset (ebp, 4)
+ ALIGN (3)
+L(oop):
+ movl (%s1_ptr,%sizeP,4), %eax
+ mull %s2_limb
+ addl %ebp, %eax
+ adcl $0, %edx
+ addl %eax, (%res_ptr,%sizeP,4)
+ adcl $0, %edx
+ movl %edx, %ebp
+
+ incl %sizeP
+ jnz L(oop)
+ movl %ebp, %eax
+
+ popl %s2_limb
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s2_limb)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %s1_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s1_ptr)
+ popl %res_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (res_ptr)
+
+ ret
+END (__mpn_addmul_1)
diff --git a/REORG.TODO/sysdeps/i386/asm-syntax.h b/REORG.TODO/sysdeps/i386/asm-syntax.h
new file mode 100644
index 0000000000..a992da2dd1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/asm-syntax.h
@@ -0,0 +1,24 @@
+/* Definitions for x86 syntax variations.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in the GNU MP Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#undef ALIGN
+#define ALIGN(log) .align 1<<log
+
+#undef L
+#define L(body) .L##body
diff --git a/REORG.TODO/sysdeps/i386/atomic-machine.h b/REORG.TODO/sysdeps/i386/atomic-machine.h
new file mode 100644
index 0000000000..0e24200617
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/atomic-machine.h
@@ -0,0 +1,545 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+#include <tls.h> /* For tcbhead_t. */
+
+
+typedef int8_t atomic8_t;
+typedef uint8_t uatomic8_t;
+typedef int_fast8_t atomic_fast8_t;
+typedef uint_fast8_t uatomic_fast8_t;
+
+typedef int16_t atomic16_t;
+typedef uint16_t uatomic16_t;
+typedef int_fast16_t atomic_fast16_t;
+typedef uint_fast16_t uatomic_fast16_t;
+
+typedef int32_t atomic32_t;
+typedef uint32_t uatomic32_t;
+typedef int_fast32_t atomic_fast32_t;
+typedef uint_fast32_t uatomic_fast32_t;
+
+typedef int64_t atomic64_t;
+typedef uint64_t uatomic64_t;
+typedef int_fast64_t atomic_fast64_t;
+typedef uint_fast64_t uatomic_fast64_t;
+
+typedef intptr_t atomicptr_t;
+typedef uintptr_t uatomicptr_t;
+typedef intmax_t atomic_max_t;
+typedef uintmax_t uatomic_max_t;
+
+
+#ifndef LOCK_PREFIX
+# ifdef UP
+# define LOCK_PREFIX /* nothing */
+# else
+# define LOCK_PREFIX "lock;"
+# endif
+#endif
+
+#define __HAVE_64B_ATOMICS 0
+#define USE_ATOMIC_COMPILER_BUILTINS 0
+#define ATOMIC_EXCHANGE_USES_CAS 0
+
+
+#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
+ __sync_val_compare_and_swap (mem, oldval, newval)
+#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
+ (! __sync_bool_compare_and_swap (mem, oldval, newval))
+
+
+#define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile ("cmpl $0, %%gs:%P5\n\t" \
+ "je 0f\n\t" \
+ "lock\n" \
+ "0:\tcmpxchgb %b2, %1" \
+ : "=a" (ret), "=m" (*mem) \
+ : "q" (newval), "m" (*mem), "0" (oldval), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ ret; })
+
+#define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile ("cmpl $0, %%gs:%P5\n\t" \
+ "je 0f\n\t" \
+ "lock\n" \
+ "0:\tcmpxchgw %w2, %1" \
+ : "=a" (ret), "=m" (*mem) \
+ : "r" (newval), "m" (*mem), "0" (oldval), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ ret; })
+
+#define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile ("cmpl $0, %%gs:%P5\n\t" \
+ "je 0f\n\t" \
+ "lock\n" \
+ "0:\tcmpxchgl %2, %1" \
+ : "=a" (ret), "=m" (*mem) \
+ : "r" (newval), "m" (*mem), "0" (oldval), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ ret; })
+
+/* XXX We do not really need 64-bit compare-and-exchange. At least
+ not in the moment. Using it would mean causing portability
+ problems since not many other 32-bit architectures have support for
+ such an operation. So don't define any code for now. If it is
+ really going to be used the code below can be used on Intel Pentium
+ and later, but NOT on i486. */
+#if 1
+# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret = *(mem); \
+ abort (); \
+ ret = (newval); \
+ ret = (oldval); \
+ ret; })
+# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret = *(mem); \
+ abort (); \
+ ret = (newval); \
+ ret = (oldval); \
+ ret; })
+#else
+# ifdef __PIC__
+# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile ("xchgl %2, %%ebx\n\t" \
+ LOCK_PREFIX "cmpxchg8b %1\n\t" \
+ "xchgl %2, %%ebx" \
+ : "=A" (ret), "=m" (*mem) \
+ : "DS" (((unsigned long long int) (newval)) \
+ & 0xffffffff), \
+ "c" (((unsigned long long int) (newval)) >> 32), \
+ "m" (*mem), "a" (((unsigned long long int) (oldval)) \
+ & 0xffffffff), \
+ "d" (((unsigned long long int) (oldval)) >> 32)); \
+ ret; })
+
+# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile ("xchgl %2, %%ebx\n\t" \
+ "cmpl $0, %%gs:%P7\n\t" \
+ "je 0f\n\t" \
+ "lock\n" \
+ "0:\tcmpxchg8b %1\n\t" \
+ "xchgl %2, %%ebx" \
+ : "=A" (ret), "=m" (*mem) \
+ : "DS" (((unsigned long long int) (newval)) \
+ & 0xffffffff), \
+ "c" (((unsigned long long int) (newval)) >> 32), \
+ "m" (*mem), "a" (((unsigned long long int) (oldval)) \
+ & 0xffffffff), \
+ "d" (((unsigned long long int) (oldval)) >> 32), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ ret; })
+# else
+# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile (LOCK_PREFIX "cmpxchg8b %1" \
+ : "=A" (ret), "=m" (*mem) \
+ : "b" (((unsigned long long int) (newval)) \
+ & 0xffffffff), \
+ "c" (((unsigned long long int) (newval)) >> 32), \
+ "m" (*mem), "a" (((unsigned long long int) (oldval)) \
+ & 0xffffffff), \
+ "d" (((unsigned long long int) (oldval)) >> 32)); \
+ ret; })
+
+# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) ret; \
+ __asm __volatile ("cmpl $0, %%gs:%P7\n\t" \
+ "je 0f\n\t" \
+ "lock\n" \
+ "0:\tcmpxchg8b %1" \
+ : "=A" (ret), "=m" (*mem) \
+ : "b" (((unsigned long long int) (newval)) \
+ & 0xffffffff), \
+ "c" (((unsigned long long int) (newval)) >> 32), \
+ "m" (*mem), "a" (((unsigned long long int) (oldval)) \
+ & 0xffffffff), \
+ "d" (((unsigned long long int) (oldval)) >> 32), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ ret; })
+# endif
+#endif
+
+
+/* Note that we need no lock prefix. */
+#define atomic_exchange_acq(mem, newvalue) \
+ ({ __typeof (*mem) result; \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile ("xchgb %b0, %1" \
+ : "=q" (result), "=m" (*mem) \
+ : "0" (newvalue), "m" (*mem)); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile ("xchgw %w0, %1" \
+ : "=r" (result), "=m" (*mem) \
+ : "0" (newvalue), "m" (*mem)); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile ("xchgl %0, %1" \
+ : "=r" (result), "=m" (*mem) \
+ : "0" (newvalue), "m" (*mem)); \
+ else \
+ { \
+ result = 0; \
+ abort (); \
+ } \
+ result; })
+
+
+#define __arch_exchange_and_add_body(lock, pfx, mem, value) \
+ ({ __typeof (*mem) __result; \
+ __typeof (value) __addval = (value); \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (lock "xaddb %b0, %1" \
+ : "=q" (__result), "=m" (*mem) \
+ : "0" (__addval), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (lock "xaddw %w0, %1" \
+ : "=r" (__result), "=m" (*mem) \
+ : "0" (__addval), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (lock "xaddl %0, %1" \
+ : "=r" (__result), "=m" (*mem) \
+ : "0" (__addval), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else \
+ { \
+ __typeof (mem) __memp = (mem); \
+ __typeof (*mem) __tmpval; \
+ __result = *__memp; \
+ do \
+ __tmpval = __result; \
+ while ((__result = pfx##_compare_and_exchange_val_64_acq \
+ (__memp, __result + __addval, __result)) == __tmpval); \
+ } \
+ __result; })
+
+#define atomic_exchange_and_add(mem, value) \
+ __sync_fetch_and_add (mem, value)
+
+#define __arch_exchange_and_add_cprefix \
+ "cmpl $0, %%gs:%P4\n\tje 0f\n\tlock\n0:\t"
+
+#define catomic_exchange_and_add(mem, value) \
+ __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c, \
+ mem, value)
+
+
+#define __arch_add_body(lock, pfx, mem, value) \
+ do { \
+ if (__builtin_constant_p (value) && (value) == 1) \
+ atomic_increment (mem); \
+ else if (__builtin_constant_p (value) && (value) == -1) \
+ atomic_decrement (mem); \
+ else if (sizeof (*mem) == 1) \
+ __asm __volatile (lock "addb %b1, %0" \
+ : "=m" (*mem) \
+ : "iq" (value), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (lock "addw %w1, %0" \
+ : "=m" (*mem) \
+ : "ir" (value), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (lock "addl %1, %0" \
+ : "=m" (*mem) \
+ : "ir" (value), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else \
+ { \
+ __typeof (value) __addval = (value); \
+ __typeof (mem) __memp = (mem); \
+ __typeof (*mem) __oldval = *__memp; \
+ __typeof (*mem) __tmpval; \
+ do \
+ __tmpval = __oldval; \
+ while ((__oldval = pfx##_compare_and_exchange_val_64_acq \
+ (__memp, __oldval + __addval, __oldval)) == __tmpval); \
+ } \
+ } while (0)
+
+#define atomic_add(mem, value) \
+ __arch_add_body (LOCK_PREFIX, __arch, mem, value)
+
+#define __arch_add_cprefix \
+ "cmpl $0, %%gs:%P3\n\tje 0f\n\tlock\n0:\t"
+
+#define catomic_add(mem, value) \
+ __arch_add_body (__arch_add_cprefix, __arch_c, mem, value)
+
+
+#define atomic_add_negative(mem, value) \
+ ({ unsigned char __result; \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "iq" (value), "m" (*mem)); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "ir" (value), "m" (*mem)); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "ir" (value), "m" (*mem)); \
+ else \
+ abort (); \
+ __result; })
+
+
+#define atomic_add_zero(mem, value) \
+ ({ unsigned char __result; \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "iq" (value), "m" (*mem)); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "ir" (value), "m" (*mem)); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "ir" (value), "m" (*mem)); \
+ else \
+ abort (); \
+ __result; })
+
+
+#define __arch_increment_body(lock, pfx, mem) \
+ do { \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (lock "incb %b0" \
+ : "=m" (*mem) \
+ : "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (lock "incw %w0" \
+ : "=m" (*mem) \
+ : "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (lock "incl %0" \
+ : "=m" (*mem) \
+ : "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else \
+ { \
+ __typeof (mem) __memp = (mem); \
+ __typeof (*mem) __oldval = *__memp; \
+ __typeof (*mem) __tmpval; \
+ do \
+ __tmpval = __oldval; \
+ while ((__oldval = pfx##_compare_and_exchange_val_64_acq \
+ (__memp, __oldval + 1, __oldval)) == __tmpval); \
+ } \
+ } while (0)
+
+#define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem)
+
+#define __arch_increment_cprefix \
+ "cmpl $0, %%gs:%P2\n\tje 0f\n\tlock\n0:\t"
+
+#define catomic_increment(mem) \
+ __arch_increment_body (__arch_increment_cprefix, __arch_c, mem)
+
+
+#define atomic_increment_and_test(mem) \
+ ({ unsigned char __result; \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (LOCK_PREFIX "incb %0; sete %b1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "m" (*mem)); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (LOCK_PREFIX "incw %0; sete %w1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "m" (*mem)); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (LOCK_PREFIX "incl %0; sete %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "m" (*mem)); \
+ else \
+ abort (); \
+ __result; })
+
+
+#define __arch_decrement_body(lock, pfx, mem) \
+ do { \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (lock "decb %b0" \
+ : "=m" (*mem) \
+ : "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (lock "decw %w0" \
+ : "=m" (*mem) \
+ : "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (lock "decl %0" \
+ : "=m" (*mem) \
+ : "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else \
+ { \
+ __typeof (mem) __memp = (mem); \
+ __typeof (*mem) __oldval = *__memp; \
+ __typeof (*mem) __tmpval; \
+ do \
+ __tmpval = __oldval; \
+ while ((__oldval = pfx##_compare_and_exchange_val_64_acq \
+ (__memp, __oldval - 1, __oldval)) == __tmpval); \
+ } \
+ } while (0)
+
+#define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem)
+
+#define __arch_decrement_cprefix \
+ "cmpl $0, %%gs:%P2\n\tje 0f\n\tlock\n0:\t"
+
+#define catomic_decrement(mem) \
+ __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem)
+
+
+#define atomic_decrement_and_test(mem) \
+ ({ unsigned char __result; \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (LOCK_PREFIX "decb %b0; sete %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "m" (*mem)); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (LOCK_PREFIX "decw %w0; sete %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "m" (*mem)); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (LOCK_PREFIX "decl %0; sete %1" \
+ : "=m" (*mem), "=qm" (__result) \
+ : "m" (*mem)); \
+ else \
+ abort (); \
+ __result; })
+
+
+#define atomic_bit_set(mem, bit) \
+ do { \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (LOCK_PREFIX "orb %b2, %0" \
+ : "=m" (*mem) \
+ : "m" (*mem), "iq" (1 << (bit))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (LOCK_PREFIX "orw %w2, %0" \
+ : "=m" (*mem) \
+ : "m" (*mem), "ir" (1 << (bit))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (LOCK_PREFIX "orl %2, %0" \
+ : "=m" (*mem) \
+ : "m" (*mem), "ir" (1 << (bit))); \
+ else \
+ abort (); \
+ } while (0)
+
+
+#define atomic_bit_test_set(mem, bit) \
+ ({ unsigned char __result; \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0" \
+ : "=q" (__result), "=m" (*mem) \
+ : "m" (*mem), "ir" (bit)); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0" \
+ : "=q" (__result), "=m" (*mem) \
+ : "m" (*mem), "ir" (bit)); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0" \
+ : "=q" (__result), "=m" (*mem) \
+ : "m" (*mem), "ir" (bit)); \
+ else \
+ abort (); \
+ __result; })
+
+
+#define atomic_spin_nop() asm ("rep; nop")
+
+
+#define __arch_and_body(lock, mem, mask) \
+ do { \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (lock "andb %b1, %0" \
+ : "=m" (*mem) \
+ : "iq" (mask), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (lock "andw %w1, %0" \
+ : "=m" (*mem) \
+ : "ir" (mask), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (lock "andl %1, %0" \
+ : "=m" (*mem) \
+ : "ir" (mask), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else \
+ abort (); \
+ } while (0)
+
+#define __arch_cprefix \
+ "cmpl $0, %%gs:%P3\n\tje 0f\n\tlock\n0:\t"
+
+#define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
+
+#define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask)
+
+
+#define __arch_or_body(lock, mem, mask) \
+ do { \
+ if (sizeof (*mem) == 1) \
+ __asm __volatile (lock "orb %b1, %0" \
+ : "=m" (*mem) \
+ : "iq" (mask), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 2) \
+ __asm __volatile (lock "orw %w1, %0" \
+ : "=m" (*mem) \
+ : "ir" (mask), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else if (sizeof (*mem) == 4) \
+ __asm __volatile (lock "orl %1, %0" \
+ : "=m" (*mem) \
+ : "ir" (mask), "m" (*mem), \
+ "i" (offsetof (tcbhead_t, multiple_threads))); \
+ else \
+ abort (); \
+ } while (0)
+
+#define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
+
+#define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
+
+/* We don't use mfence because it is supposedly slower due to having to
+ provide stronger guarantees (e.g., regarding self-modifying code). */
+#define atomic_full_barrier() \
+ __asm __volatile (LOCK_PREFIX "orl $0, (%%esp)" ::: "memory")
+#define atomic_read_barrier() __asm ("" ::: "memory")
+#define atomic_write_barrier() __asm ("" ::: "memory")
diff --git a/REORG.TODO/sysdeps/i386/backtrace.c b/REORG.TODO/sysdeps/i386/backtrace.c
new file mode 100644
index 0000000000..ee8238d0ce
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/backtrace.c
@@ -0,0 +1,163 @@
+/* Return backtrace of current program state.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libc-lock.h>
+#include <dlfcn.h>
+#include <execinfo.h>
+#include <stdlib.h>
+#include <unwind.h>
+
+struct trace_arg
+{
+ void **array;
+ int cnt, size;
+ void *lastebp, *lastesp;
+};
+
+#ifdef SHARED
+static _Unwind_Reason_Code (*unwind_backtrace) (_Unwind_Trace_Fn, void *);
+static _Unwind_Ptr (*unwind_getip) (struct _Unwind_Context *);
+static _Unwind_Ptr (*unwind_getcfa) (struct _Unwind_Context *);
+static _Unwind_Ptr (*unwind_getgr) (struct _Unwind_Context *, int);
+static void *libgcc_handle;
+
+static void
+init (void)
+{
+ libgcc_handle = __libc_dlopen ("libgcc_s.so.1");
+
+ if (libgcc_handle == NULL)
+ return;
+
+ unwind_backtrace = __libc_dlsym (libgcc_handle, "_Unwind_Backtrace");
+ unwind_getip = __libc_dlsym (libgcc_handle, "_Unwind_GetIP");
+ unwind_getcfa = __libc_dlsym (libgcc_handle, "_Unwind_GetCFA");
+ unwind_getgr = __libc_dlsym (libgcc_handle, "_Unwind_GetGR");
+ if (unwind_getip == NULL || unwind_getgr == NULL || unwind_getcfa == NULL)
+ {
+ unwind_backtrace = NULL;
+ __libc_dlclose (libgcc_handle);
+ libgcc_handle = NULL;
+ }
+}
+#else
+# define unwind_backtrace _Unwind_Backtrace
+# define unwind_getip _Unwind_GetIP
+# define unwind_getcfa _Unwind_GetCFA
+# define unwind_getgr _Unwind_GetGR
+#endif
+
+static _Unwind_Reason_Code
+backtrace_helper (struct _Unwind_Context *ctx, void *a)
+{
+ struct trace_arg *arg = a;
+
+ /* We are first called with address in the __backtrace function.
+ Skip it. */
+ if (arg->cnt != -1)
+ arg->array[arg->cnt] = (void *) unwind_getip (ctx);
+ if (++arg->cnt == arg->size)
+ return _URC_END_OF_STACK;
+
+ /* %ebp is DWARF2 register 5 on IA-32. */
+ arg->lastebp = (void *) unwind_getgr (ctx, 5);
+ arg->lastesp = (void *) unwind_getcfa (ctx);
+ return _URC_NO_REASON;
+}
+
+
+/* This is a global variable set at program start time. It marks the
+ highest used stack address. */
+extern void *__libc_stack_end;
+
+
+/* This is the stack layout we see with every stack frame
+ if not compiled without frame pointer.
+
+ +-----------------+ +-----------------+
+ %ebp -> | %ebp last frame--------> | %ebp last frame--->...
+ | | | |
+ | return address | | return address |
+ +-----------------+ +-----------------+
+
+ First try as far to get as far as possible using
+ _Unwind_Backtrace which handles -fomit-frame-pointer
+ as well, but requires .eh_frame info. Then fall back to
+ walking the stack manually. */
+
+struct layout
+{
+ struct layout *ebp;
+ void *ret;
+};
+
+
+int
+__backtrace (void **array, int size)
+{
+ struct trace_arg arg = { .array = array, .size = size, .cnt = -1 };
+
+ if (size <= 0)
+ return 0;
+
+#ifdef SHARED
+ __libc_once_define (static, once);
+
+ __libc_once (once, init);
+ if (unwind_backtrace == NULL)
+ return 0;
+#endif
+
+ unwind_backtrace (backtrace_helper, &arg);
+
+ if (arg.cnt > 1 && arg.array[arg.cnt - 1] == NULL)
+ --arg.cnt;
+ else if (arg.cnt < size)
+ {
+ struct layout *ebp = (struct layout *) arg.lastebp;
+
+ while (arg.cnt < size)
+ {
+ /* Check for out of range. */
+ if ((void *) ebp < arg.lastesp || (void *) ebp > __libc_stack_end
+ || ((long) ebp & 3))
+ break;
+
+ array[arg.cnt++] = ebp->ret;
+ ebp = ebp->ebp;
+ }
+ }
+ return arg.cnt != -1 ? arg.cnt : 0;
+}
+weak_alias (__backtrace, backtrace)
+libc_hidden_def (__backtrace)
+
+
+#ifdef SHARED
+/* Free all resources if necessary. */
+libc_freeres_fn (free_mem)
+{
+ unwind_backtrace = NULL;
+ if (libgcc_handle != NULL)
+ {
+ __libc_dlclose (libgcc_handle);
+ libgcc_handle = NULL;
+ }
+}
+#endif
diff --git a/REORG.TODO/sysdeps/i386/bcopy.S b/REORG.TODO/sysdeps/i386/bcopy.S
new file mode 100644
index 0000000000..12b8ddb886
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/bcopy.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY bcopy
+#include "memcpy.S"
diff --git a/REORG.TODO/sysdeps/i386/bsd-_setjmp.S b/REORG.TODO/sysdeps/i386/bsd-_setjmp.S
new file mode 100644
index 0000000000..6496304946
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/bsd-_setjmp.S
@@ -0,0 +1,56 @@
+/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. i386 version.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This just does a tail-call to `__sigsetjmp (ARG, 0)'.
+ We cannot do it in C because it must be a tail-call, so frame-unwinding
+ in setjmp doesn't clobber the state restored by longjmp. */
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+#include <stap-probe.h>
+
+#define PARMS 4 /* no space for saved regs */
+#define JMPBUF PARMS
+#define SIGMSK JMPBUF+4
+
+ENTRY (_setjmp)
+
+ xorl %eax, %eax
+ movl JMPBUF(%esp), %edx
+
+ /* Save registers. */
+ movl %ebx, (JB_BX*4)(%edx)
+ movl %esi, (JB_SI*4)(%edx)
+ movl %edi, (JB_DI*4)(%edx)
+ leal JMPBUF(%esp), %ecx /* Save SP as it will be after we return. */
+#ifdef PTR_MANGLE
+ PTR_MANGLE (%ecx)
+#endif
+ movl %ecx, (JB_SP*4)(%edx)
+ movl 0(%esp), %ecx /* Save PC we are returning to now. */
+ LIBC_PROBE (setjmp, 3, 4@%edx, -4@$0, 4@%ecx)
+#ifdef PTR_MANGLE
+ PTR_MANGLE (%ecx)
+#endif
+ movl %ecx, (JB_PC*4)(%edx)
+ movl %ebp, (JB_BP*4)(%edx) /* Save caller's frame pointer. */
+
+ movl %eax, JB_SIZE(%edx) /* No signal mask set. */
+ ret
+END (_setjmp)
+libc_hidden_def (_setjmp)
diff --git a/REORG.TODO/sysdeps/i386/bsd-setjmp.S b/REORG.TODO/sysdeps/i386/bsd-setjmp.S
new file mode 100644
index 0000000000..5710e1f42b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/bsd-setjmp.S
@@ -0,0 +1,66 @@
+/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. i386 version.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This just does a tail-call to `__sigsetjmp (ARG, 1)'.
+ We cannot do it in C because it must be a tail-call, so frame-unwinding
+ in setjmp doesn't clobber the state restored by longjmp. */
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+#include <stap-probe.h>
+
+#define PARMS 4 /* no space for saved regs */
+#define JMPBUF PARMS
+#define SIGMSK JMPBUF+4
+
+ENTRY (setjmp)
+ /* Note that we have to use a non-exported symbol in the next
+ jump since otherwise gas will emit it as a jump through the
+ PLT which is what we cannot use here. */
+
+ movl JMPBUF(%esp), %eax
+
+ /* Save registers. */
+ movl %ebx, (JB_BX*4)(%eax)
+ movl %esi, (JB_SI*4)(%eax)
+ movl %edi, (JB_DI*4)(%eax)
+ leal JMPBUF(%esp), %ecx /* Save SP as it will be after we return. */
+#ifdef PTR_MANGLE
+ PTR_MANGLE (%ecx)
+#endif
+ movl %ecx, (JB_SP*4)(%eax)
+ movl 0(%esp), %ecx /* Save PC we are returning to now. */
+ LIBC_PROBE (setjmp, 3, 4@%eax, -4@$1, 4@%ecx)
+#ifdef PTR_MANGLE
+ PTR_MANGLE (%ecx)
+#endif
+ movl %ecx, (JB_PC*4)(%eax)
+ movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */
+
+ /* Call __sigjmp_save. */
+ pushl $1
+ cfi_adjust_cfa_offset (4)
+ pushl 8(%esp)
+ cfi_adjust_cfa_offset (4)
+ call __sigjmp_save
+ popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ ret
+END (setjmp)
diff --git a/REORG.TODO/sysdeps/i386/bzero.S b/REORG.TODO/sysdeps/i386/bzero.S
new file mode 100644
index 0000000000..c8dd47b4da
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/bzero.S
@@ -0,0 +1,5 @@
+#define USE_AS_BZERO
+#define memset __bzero
+#include "memset.S"
+
+weak_alias (__bzero, bzero)
diff --git a/REORG.TODO/sysdeps/i386/cacheinfo.c b/REORG.TODO/sysdeps/i386/cacheinfo.c
new file mode 100644
index 0000000000..f15fe0779a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/cacheinfo.c
@@ -0,0 +1,3 @@
+#define DISABLE_PREFETCHW
+
+#include <sysdeps/x86/cacheinfo.c>
diff --git a/REORG.TODO/sysdeps/i386/configure b/REORG.TODO/sysdeps/i386/configure
new file mode 100644
index 0000000000..5b55c5affe
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/configure
@@ -0,0 +1,84 @@
+# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/i386.
+
+# We no longer support i386 since it lacks the atomic instructions
+# required to implement NPTL threading.
+if test "$config_machine" = i386; then
+ as_fn_error $? "
+*** ERROR: Support for i386 is deprecated.
+*** Please use host i786, i686, i585 or i486.
+*** For example: /src/glibc/configure --host=i686-pc-linux-gnu ...\"" "$LINENO" 5
+fi
+
+# The GNU C Library can't be built for i386. There are several reasons for
+# this restriction. The primary reason is that i386 lacks the atomic
+# operations required to support the current NPTL implementation. While it is
+# possible that such atomic operations could be emulated in the kernel to date
+# no such work has been done to enable this. Even with NPTL disabled you still
+# have no atomic.h implementation. Given the declining use of i386 we disable
+# support for building with `-march=i386' or `-mcpu=i386.' We don't explicitly
+# check for i386, instead we make sure the compiler has support for inlining
+# the builtin __sync_val_compare_and_swap. If it does then we should have no
+# problem building for i386.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for compiler support of inlined builtin function __sync_val_compare_and_swap" >&5
+$as_echo_n "checking for compiler support of inlined builtin function __sync_val_compare_and_swap... " >&6; }
+libc_compiler_builtin_inlined=no
+cat > conftest.c <<EOF
+int _start (void) { int a, b, c; __sync_val_compare_and_swap (&a, b, c); return 0; }
+EOF
+if ! { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS
+ -O0 -nostdlib -nostartfiles
+ -S conftest.c -o - | fgrep "__sync_val_compare_and_swap"
+ 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+then
+ libc_compiler_builtin_inlined=yes
+fi
+rm -f conftest*
+if test $libc_compiler_builtin_inlined = yes; then
+ libc_cv_unsupported_i386=no
+else
+ as_fn_error $? "
+*** Building with -march=i386/-mcpu=i386 is not supported.
+*** Please use host i786, i686, i586, or i486.
+*** For example: /source/glibc/configure CFLAGS='-O2 -march=i686' ..." "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_compiler_builtin_inlined" >&5
+$as_echo "$libc_compiler_builtin_inlined" >&6; }
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
+$as_echo_n "checking for Intel MPX support... " >&6; }
+if ${libc_cv_asm_mpx+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ bndmov %bnd0,(%esp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_asm_mpx=yes
+else
+ libc_cv_asm_mpx=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
+$as_echo "$libc_cv_asm_mpx" >&6; }
+if test $libc_cv_asm_mpx = yes; then
+ $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
+
+fi
+
+$as_echo "#define USE_REGPARMS 1" >>confdefs.h
+
+
+$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
+
diff --git a/REORG.TODO/sysdeps/i386/configure.ac b/REORG.TODO/sysdeps/i386/configure.ac
new file mode 100644
index 0000000000..19ef33f34a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/configure.ac
@@ -0,0 +1,52 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/i386.
+
+# We no longer support i386 since it lacks the atomic instructions
+# required to implement NPTL threading.
+if test "$config_machine" = i386; then
+ AC_MSG_ERROR([
+*** ERROR: Support for i386 is deprecated.
+*** Please use host i786, i686, i585 or i486.
+*** For example: /src/glibc/configure --host=i686-pc-linux-gnu ..."])
+fi
+
+# The GNU C Library can't be built for i386. There are several reasons for
+# this restriction. The primary reason is that i386 lacks the atomic
+# operations required to support the current NPTL implementation. While it is
+# possible that such atomic operations could be emulated in the kernel to date
+# no such work has been done to enable this. Even with NPTL disabled you still
+# have no atomic.h implementation. Given the declining use of i386 we disable
+# support for building with `-march=i386' or `-mcpu=i386.' We don't explicitly
+# check for i386, instead we make sure the compiler has support for inlining
+# the builtin __sync_val_compare_and_swap. If it does then we should have no
+# problem building for i386.
+LIBC_COMPILER_BUILTIN_INLINED(
+ [__sync_val_compare_and_swap],
+ [int a, b, c; __sync_val_compare_and_swap (&a, b, c);],
+ [-O0],
+ [libc_cv_unsupported_i386=no],
+ [AC_MSG_ERROR([
+*** Building with -march=i386/-mcpu=i386 is not supported.
+*** Please use host i786, i686, i586, or i486.
+*** For example: /source/glibc/configure CFLAGS='-O2 -march=i686' ...])])
+
+dnl Check whether asm supports Intel MPX
+AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
+cat > conftest.s <<\EOF
+ bndmov %bnd0,(%esp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_asm_mpx=yes
+else
+ libc_cv_asm_mpx=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_mpx = yes; then
+ AC_DEFINE(HAVE_MPX_SUPPORT)
+fi
+
+AC_DEFINE(USE_REGPARMS)
+
+dnl It is always possible to access static and hidden symbols in an
+dnl position independent way.
+AC_DEFINE(PI_STATIC_AND_HIDDEN)
diff --git a/REORG.TODO/sysdeps/i386/crti.S b/REORG.TODO/sysdeps/i386/crti.S
new file mode 100644
index 0000000000..f800209990
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/crti.S
@@ -0,0 +1,84 @@
+/* Special .init and .fini section support for x86.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* crti.S puts a function prologue at the beginning of the .init and
+ .fini sections and defines global symbols for those addresses, so
+ they can be called as functions. The symbols _init and _fini are
+ magic and cause the linker to emit DT_INIT and DT_FINI. */
+
+#include <libc-symbols.h>
+#include <sysdep.h>
+
+#ifndef PREINIT_FUNCTION
+# define PREINIT_FUNCTION __gmon_start__
+#endif
+
+#ifndef PREINIT_FUNCTION_WEAK
+# define PREINIT_FUNCTION_WEAK 1
+#endif
+
+#if PREINIT_FUNCTION_WEAK
+ weak_extern (PREINIT_FUNCTION)
+#else
+ .hidden PREINIT_FUNCTION
+#endif
+
+ .section .init,"ax",@progbits
+ .p2align 2
+ .globl _init
+ .type _init, @function
+_init:
+ pushl %ebx
+ /* Maintain 16-byte stack alignment for called functions. */
+ subl $8, %esp
+ LOAD_PIC_REG (bx)
+#if PREINIT_FUNCTION_WEAK
+ movl PREINIT_FUNCTION@GOT(%ebx), %eax
+ testl %eax, %eax
+ je .Lno_weak_fn
+ call PREINIT_FUNCTION@PLT
+.Lno_weak_fn:
+#else
+ call PREINIT_FUNCTION
+#endif
+
+ .section .fini,"ax",@progbits
+ .p2align 2
+ .globl _fini
+ .type _fini, @function
+_fini:
+ pushl %ebx
+ subl $8, %esp
+ LOAD_PIC_REG (bx)
diff --git a/REORG.TODO/sysdeps/i386/crtn.S b/REORG.TODO/sysdeps/i386/crtn.S
new file mode 100644
index 0000000000..b18b9c171a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/crtn.S
@@ -0,0 +1,47 @@
+/* Special .init and .fini section support for x86.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* crtn.S puts function epilogues in the .init and .fini sections
+ corresponding to the prologues in crti.S. */
+
+ .section .init,"ax",@progbits
+ addl $8, %esp
+ popl %ebx
+ ret
+
+ .section .fini,"ax",@progbits
+ addl $8, %esp
+ popl %ebx
+ ret
diff --git a/REORG.TODO/sysdeps/i386/dl-irel.h b/REORG.TODO/sysdeps/i386/dl-irel.h
new file mode 100644
index 0000000000..824e81aed1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-irel.h
@@ -0,0 +1,51 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+ i386 version.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+
+#define ELF_MACHINE_IREL 1
+
+static inline Elf32_Addr
+__attribute ((always_inline))
+elf_ifunc_invoke (Elf32_Addr addr)
+{
+ return ((Elf32_Addr (*) (void)) (addr)) ();
+}
+
+static inline void
+__attribute ((always_inline))
+elf_irel (const Elf32_Rel *reloc)
+{
+ Elf32_Addr *const reloc_addr = (void *) reloc->r_offset;
+ const unsigned long int r_type = ELF32_R_TYPE (reloc->r_info);
+
+ if (__glibc_likely (r_type == R_386_IRELATIVE))
+ {
+ Elf32_Addr value = elf_ifunc_invoke(*reloc_addr);
+ *reloc_addr = value;
+ }
+ else
+ __libc_fatal ("unexpected reloc type in static binary");
+}
+
+#endif /* dl-irel.h */
diff --git a/REORG.TODO/sysdeps/i386/dl-lookupcfg.h b/REORG.TODO/sysdeps/i386/dl-lookupcfg.h
new file mode 100644
index 0000000000..47b534a059
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-lookupcfg.h
@@ -0,0 +1,32 @@
+/* Configuration of lookup functions.
+ Copyright (C) 2005-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define DL_UNMAP_IS_SPECIAL
+
+#include_next <dl-lookupcfg.h>
+
+/* Address of protected data defined in the shared library may be
+ external due to copy relocation. */
+#define DL_EXTERN_PROTECTED_DATA
+
+struct link_map;
+
+extern void _dl_unmap (struct link_map *map)
+ internal_function attribute_hidden;
+
+#define DL_UNMAP(map) _dl_unmap (map)
diff --git a/REORG.TODO/sysdeps/i386/dl-machine.h b/REORG.TODO/sysdeps/i386/dl-machine.h
new file mode 100644
index 0000000000..57d4a0bdbd
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-machine.h
@@ -0,0 +1,757 @@
+/* Machine-dependent ELF dynamic relocation inline functions. i386 version.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef dl_machine_h
+#define dl_machine_h
+
+#define ELF_MACHINE_NAME "i386"
+
+#include <sys/param.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <cpu-features.c>
+
+/* Return nonzero iff ELF header is compatible with the running host. */
+static inline int __attribute__ ((unused))
+elf_machine_matches_host (const Elf32_Ehdr *ehdr)
+{
+ return ehdr->e_machine == EM_386;
+}
+
+
+/* Return the link-time address of _DYNAMIC. Conveniently, this is the
+ first element of the GOT, a special entry that is never relocated. */
+static inline Elf32_Addr __attribute__ ((unused, const))
+elf_machine_dynamic (void)
+{
+ /* This produces a GOTOFF reloc that resolves to zero at link time, so in
+ fact just loads from the GOT register directly. By doing it without
+ an asm we can let the compiler choose any register. */
+ extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
+ return _GLOBAL_OFFSET_TABLE_[0];
+}
+
+/* Return the run-time load address of the shared object. */
+static inline Elf32_Addr __attribute__ ((unused))
+elf_machine_load_address (void)
+{
+ /* Compute the difference between the runtime address of _DYNAMIC as seen
+ by a GOTOFF reference, and the link-time address found in the special
+ unrelocated first GOT entry. */
+ extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden;
+ return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
+}
+
+/* Set up the loaded object described by L so its unrelocated PLT
+ entries will jump to the on-demand fixup code in dl-runtime.c. */
+
+static inline int __attribute__ ((unused, always_inline))
+elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+{
+ Elf32_Addr *got;
+ extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden;
+ extern void _dl_runtime_profile (Elf32_Word) attribute_hidden;
+
+ if (l->l_info[DT_JMPREL] && lazy)
+ {
+ /* The GOT entries for functions in the PLT have not yet been filled
+ in. Their initial contents will arrange when called to push an
+ offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1],
+ and then jump to _GLOBAL_OFFSET_TABLE[2]. */
+ got = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
+ /* If a library is prelinked but we have to relocate anyway,
+ we have to be able to undo the prelinking of .got.plt.
+ The prelinker saved us here address of .plt + 0x16. */
+ if (got[1])
+ {
+ l->l_mach.plt = got[1] + l->l_addr;
+ l->l_mach.gotplt = (Elf32_Addr) &got[3];
+ }
+ got[1] = (Elf32_Addr) l; /* Identify this shared object. */
+
+ /* The got[2] entry contains the address of a function which gets
+ called to get the address of a so far unresolved function and
+ jump to it. The profiling extension of the dynamic linker allows
+ to intercept the calls to collect information. In this case we
+ don't store the address in the GOT so that all future calls also
+ end in this function. */
+ if (__glibc_unlikely (profile))
+ {
+ got[2] = (Elf32_Addr) &_dl_runtime_profile;
+
+ if (GLRO(dl_profile) != NULL
+ && _dl_name_match_p (GLRO(dl_profile), l))
+ /* This is the object we are looking for. Say that we really
+ want profiling and the timers are started. */
+ GL(dl_profile_map) = l;
+ }
+ else
+ /* This function will get called to fix up the GOT entry indicated by
+ the offset on the stack, and then jump to the resolved address. */
+ got[2] = (Elf32_Addr) &_dl_runtime_resolve;
+ }
+
+ return lazy;
+}
+
+#ifdef IN_DL_RUNTIME
+
+# ifndef PROF
+/* We add a declaration of this function here so that in dl-runtime.c
+ the ELF_MACHINE_RUNTIME_TRAMPOLINE macro really can pass the parameters
+ in registers.
+
+ We cannot use this scheme for profiling because the _mcount call
+ destroys the passed register information. */
+#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused))
+
+extern ElfW(Addr) _dl_fixup (struct link_map *l,
+ ElfW(Word) reloc_offset)
+ ARCH_FIXUP_ATTRIBUTE;
+extern ElfW(Addr) _dl_profile_fixup (struct link_map *l,
+ ElfW(Word) reloc_offset,
+ ElfW(Addr) retaddr, void *regs,
+ long int *framesizep)
+ ARCH_FIXUP_ATTRIBUTE;
+# endif
+
+#endif
+
+/* Mask identifying addresses reserved for the user program,
+ where the dynamic linker should not map anything. */
+#define ELF_MACHINE_USER_ADDRESS_MASK 0xf8000000UL
+
+/* Initial entry point code for the dynamic linker.
+ The C function `_dl_start' is the real entry point;
+ its return value is the user program's entry point. */
+
+#define RTLD_START asm ("\n\
+ .text\n\
+ .align 16\n\
+0: movl (%esp), %ebx\n\
+ ret\n\
+ .align 16\n\
+.globl _start\n\
+.globl _dl_start_user\n\
+_start:\n\
+ # Note that _dl_start gets the parameter in %eax.\n\
+ movl %esp, %eax\n\
+ call _dl_start\n\
+_dl_start_user:\n\
+ # Save the user entry point address in %edi.\n\
+ movl %eax, %edi\n\
+ # Point %ebx at the GOT.\n\
+ call 0b\n\
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx\n\
+ # See if we were run as a command with the executable file\n\
+ # name as an extra leading argument.\n\
+ movl _dl_skip_args@GOTOFF(%ebx), %eax\n\
+ # Pop the original argument count.\n\
+ popl %edx\n\
+ # Adjust the stack pointer to skip _dl_skip_args words.\n\
+ leal (%esp,%eax,4), %esp\n\
+ # Subtract _dl_skip_args from argc.\n\
+ subl %eax, %edx\n\
+ # Push argc back on the stack.\n\
+ push %edx\n\
+ # The special initializer gets called with the stack just\n\
+ # as the application's entry point will see it; it can\n\
+ # switch stacks if it moves these contents over.\n\
+" RTLD_START_SPECIAL_INIT "\n\
+ # Load the parameters again.\n\
+ # (eax, edx, ecx, *--esp) = (_dl_loaded, argc, argv, envp)\n\
+ movl _rtld_local@GOTOFF(%ebx), %eax\n\
+ leal 8(%esp,%edx,4), %esi\n\
+ leal 4(%esp), %ecx\n\
+ movl %esp, %ebp\n\
+ # Make sure _dl_init is run with 16 byte aligned stack.\n\
+ andl $-16, %esp\n\
+ pushl %eax\n\
+ pushl %eax\n\
+ pushl %ebp\n\
+ pushl %esi\n\
+ # Clear %ebp, so that even constructors have terminated backchain.\n\
+ xorl %ebp, %ebp\n\
+ # Call the function to run the initializers.\n\
+ call _dl_init\n\
+ # Pass our finalizer function to the user in %edx, as per ELF ABI.\n\
+ leal _dl_fini@GOTOFF(%ebx), %edx\n\
+ # Restore %esp _start expects.\n\
+ movl (%esp), %esp\n\
+ # Jump to the user's entry point.\n\
+ jmp *%edi\n\
+ .previous\n\
+");
+
+#ifndef RTLD_START_SPECIAL_INIT
+# define RTLD_START_SPECIAL_INIT /* nothing */
+#endif
+
+/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or
+ TLS variable, so undefined references should not be allowed to
+ define the value.
+ ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one
+ of the main executable's symbols, as for a COPY reloc.
+ ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA iff TYPE describes relocation may
+ against protected data whose address be external due to copy relocation.
+ */
+# define elf_machine_type_class(type) \
+ ((((type) == R_386_JMP_SLOT || (type) == R_386_TLS_DTPMOD32 \
+ || (type) == R_386_TLS_DTPOFF32 || (type) == R_386_TLS_TPOFF32 \
+ || (type) == R_386_TLS_TPOFF || (type) == R_386_TLS_DESC) \
+ * ELF_RTYPE_CLASS_PLT) \
+ | (((type) == R_386_COPY) * ELF_RTYPE_CLASS_COPY) \
+ | (((type) == R_386_GLOB_DAT) * ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA))
+
+/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
+#define ELF_MACHINE_JMP_SLOT R_386_JMP_SLOT
+
+/* The i386 never uses Elf32_Rela relocations for the dynamic linker.
+ Prelinked libraries may use Elf32_Rela though. */
+#define ELF_MACHINE_PLT_REL 1
+
+/* We define an initialization functions. This is called very early in
+ _dl_sysdep_start. */
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+#if IS_IN (rtld)
+ /* init_cpu_features has been called early from __libc_start_main in
+ static executable. */
+ init_cpu_features (&GLRO(dl_x86_cpu_features));
+#else
+ if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+ /* Avoid an empty string which would disturb us. */
+ GLRO(dl_platform) = NULL;
+#endif
+}
+
+static inline Elf32_Addr
+elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+ const Elf32_Rel *reloc,
+ Elf32_Addr *reloc_addr, Elf32_Addr value)
+{
+ return *reloc_addr = value;
+}
+
+/* Return the final value of a plt relocation. */
+static inline Elf32_Addr
+elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc,
+ Elf32_Addr value)
+{
+ return value;
+}
+
+
+/* Names of the architecture-specific auditing callback functions. */
+#define ARCH_LA_PLTENTER i86_gnu_pltenter
+#define ARCH_LA_PLTEXIT i86_gnu_pltexit
+
+#endif /* !dl_machine_h */
+
+/* The i386 never uses Elf32_Rela relocations for the dynamic linker.
+ Prelinked libraries may use Elf32_Rela though. */
+#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP
+#define ELF_MACHINE_NO_REL 0
+
+#ifdef RESOLVE_MAP
+
+/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
+ MAP is the object containing the reloc. */
+
+auto inline void
+__attribute ((always_inline))
+elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
+ const Elf32_Sym *sym, const struct r_found_version *version,
+ void *const reloc_addr_arg, int skip_ifunc)
+{
+ Elf32_Addr *const reloc_addr = reloc_addr_arg;
+ const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+
+# if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC
+ if (__glibc_unlikely (r_type == R_386_RELATIVE))
+ {
+# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC
+ /* This is defined in rtld.c, but nowhere in the static libc.a;
+ make the reference weak so static programs can still link.
+ This declaration cannot be done when compiling rtld.c
+ (i.e. #ifdef RTLD_BOOTSTRAP) because rtld.c contains the
+ common defn for _dl_rtld_map, which is incompatible with a
+ weak decl in the same file. */
+# ifndef SHARED
+ weak_extern (_dl_rtld_map);
+# endif
+ if (map != &GL(dl_rtld_map)) /* Already done in rtld itself. */
+# endif
+ *reloc_addr += map->l_addr;
+ }
+# ifndef RTLD_BOOTSTRAP
+ else if (__glibc_unlikely (r_type == R_386_NONE))
+ return;
+# endif
+ else
+# endif /* !RTLD_BOOTSTRAP and have no -z combreloc */
+ {
+# ifndef RTLD_BOOTSTRAP
+ const Elf32_Sym *const refsym = sym;
+# endif
+ struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ Elf32_Addr value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value;
+
+ if (sym != NULL
+ && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC,
+ 0)
+ && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1)
+ && __builtin_expect (!skip_ifunc, 1))
+ {
+# ifndef RTLD_BOOTSTRAP
+ if (sym_map != map
+ && sym_map->l_type != lt_executable
+ && !sym_map->l_relocated)
+ {
+ const char *strtab
+ = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+ _dl_error_printf ("\
+%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+ RTLD_PROGNAME, map->l_name,
+ sym_map->l_name,
+ strtab + refsym->st_name);
+ }
+# endif
+ value = ((Elf32_Addr (*) (void)) value) ();
+ }
+
+ switch (r_type)
+ {
+# ifndef RTLD_BOOTSTRAP
+ case R_386_SIZE32:
+ /* Set to symbol size plus addend. */
+ *reloc_addr += sym->st_size;
+ break;
+# endif
+ case R_386_GLOB_DAT:
+ case R_386_JMP_SLOT:
+ *reloc_addr = value;
+ break;
+
+ case R_386_TLS_DTPMOD32:
+# ifdef RTLD_BOOTSTRAP
+ /* During startup the dynamic linker is always the module
+ with index 1.
+ XXX If this relocation is necessary move before RESOLVE
+ call. */
+ *reloc_addr = 1;
+# else
+ /* Get the information from the link map returned by the
+ resolv function. */
+ if (sym_map != NULL)
+ *reloc_addr = sym_map->l_tls_modid;
+# endif
+ break;
+ case R_386_TLS_DTPOFF32:
+# ifndef RTLD_BOOTSTRAP
+ /* During relocation all TLS symbols are defined and used.
+ Therefore the offset is already correct. */
+ if (sym != NULL)
+ *reloc_addr = sym->st_value;
+# endif
+ break;
+ case R_386_TLS_DESC:
+ {
+ struct tlsdesc volatile *td =
+ (struct tlsdesc volatile *)reloc_addr;
+
+# ifndef RTLD_BOOTSTRAP
+ if (! sym)
+ td->entry = _dl_tlsdesc_undefweak;
+ else
+# endif
+ {
+# ifndef RTLD_BOOTSTRAP
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (!TRY_STATIC_TLS (map, sym_map))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic
+ (sym_map, sym->st_value + (ElfW(Word))td->arg);
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+# endif
+# endif
+ {
+ td->arg = (void*)(sym->st_value - sym_map->l_tls_offset
+ + (ElfW(Word))td->arg);
+ td->entry = _dl_tlsdesc_return;
+ }
+ }
+ break;
+ }
+ case R_386_TLS_TPOFF32:
+ /* The offset is positive, backward from the thread pointer. */
+# ifdef RTLD_BOOTSTRAP
+ *reloc_addr += map->l_tls_offset - sym->st_value;
+# else
+ /* We know the offset of object the symbol is contained in.
+ It is a positive value which will be subtracted from the
+ thread pointer. To get the variable position in the TLS
+ block we subtract the offset from that of the TLS block. */
+ if (sym != NULL)
+ {
+ CHECK_STATIC_TLS (map, sym_map);
+ *reloc_addr += sym_map->l_tls_offset - sym->st_value;
+ }
+# endif
+ break;
+ case R_386_TLS_TPOFF:
+ /* The offset is negative, forward from the thread pointer. */
+# ifdef RTLD_BOOTSTRAP
+ *reloc_addr += sym->st_value - map->l_tls_offset;
+# else
+ /* We know the offset of object the symbol is contained in.
+ It is a negative value which will be added to the
+ thread pointer. */
+ if (sym != NULL)
+ {
+ CHECK_STATIC_TLS (map, sym_map);
+ *reloc_addr += sym->st_value - sym_map->l_tls_offset;
+ }
+# endif
+ break;
+
+# ifndef RTLD_BOOTSTRAP
+ case R_386_32:
+ *reloc_addr += value;
+ break;
+ case R_386_PC32:
+ *reloc_addr += (value - (Elf32_Addr) reloc_addr);
+ break;
+ case R_386_COPY:
+ if (sym == NULL)
+ /* This can happen in trace mode if an object could not be
+ found. */
+ break;
+ if (__builtin_expect (sym->st_size > refsym->st_size, 0)
+ || (__builtin_expect (sym->st_size < refsym->st_size, 0)
+ && GLRO(dl_verbose)))
+ {
+ const char *strtab;
+
+ strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+ _dl_error_printf ("\
+%s: Symbol `%s' has different size in shared object, consider re-linking\n",
+ RTLD_PROGNAME, strtab + refsym->st_name);
+ }
+ memcpy (reloc_addr_arg, (void *) value,
+ MIN (sym->st_size, refsym->st_size));
+ break;
+ case R_386_IRELATIVE:
+ value = map->l_addr + *reloc_addr;
+ value = ((Elf32_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ break;
+ default:
+ _dl_reloc_bad_type (map, r_type, 0);
+ break;
+# endif /* !RTLD_BOOTSTRAP */
+ }
+ }
+}
+
+# ifndef RTLD_BOOTSTRAP
+auto inline void
+__attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
+ const Elf32_Sym *sym, const struct r_found_version *version,
+ void *const reloc_addr_arg, int skip_ifunc)
+{
+ Elf32_Addr *const reloc_addr = reloc_addr_arg;
+ const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+
+ if (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE)
+ *reloc_addr = map->l_addr + reloc->r_addend;
+ else if (r_type != R_386_NONE)
+ {
+# ifndef RESOLVE_CONFLICT_FIND_MAP
+ const Elf32_Sym *const refsym = sym;
+# endif
+ struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+
+ if (sym != NULL
+ && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1)
+ && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)
+ && __builtin_expect (!skip_ifunc, 1))
+ value = ((Elf32_Addr (*) (void)) value) ();
+
+ switch (ELF32_R_TYPE (reloc->r_info))
+ {
+ case R_386_SIZE32:
+ /* Set to symbol size plus addend. */
+ value = sym->st_size;
+ case R_386_GLOB_DAT:
+ case R_386_JMP_SLOT:
+ case R_386_32:
+ *reloc_addr = value + reloc->r_addend;
+ break;
+# ifndef RESOLVE_CONFLICT_FIND_MAP
+ /* Not needed for dl-conflict.c. */
+ case R_386_PC32:
+ *reloc_addr = (value + reloc->r_addend - (Elf32_Addr) reloc_addr);
+ break;
+
+ case R_386_TLS_DTPMOD32:
+ /* Get the information from the link map returned by the
+ resolv function. */
+ if (sym_map != NULL)
+ *reloc_addr = sym_map->l_tls_modid;
+ break;
+ case R_386_TLS_DTPOFF32:
+ /* During relocation all TLS symbols are defined and used.
+ Therefore the offset is already correct. */
+ *reloc_addr = (sym == NULL ? 0 : sym->st_value) + reloc->r_addend;
+ break;
+ case R_386_TLS_DESC:
+ {
+ struct tlsdesc volatile *td =
+ (struct tlsdesc volatile *)reloc_addr;
+
+# ifndef RTLD_BOOTSTRAP
+ if (!sym)
+ {
+ td->arg = (void*)reloc->r_addend;
+ td->entry = _dl_tlsdesc_undefweak;
+ }
+ else
+# endif
+ {
+# ifndef RTLD_BOOTSTRAP
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (!TRY_STATIC_TLS (map, sym_map))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic
+ (sym_map, sym->st_value + reloc->r_addend);
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+# endif
+# endif
+ {
+ td->arg = (void*)(sym->st_value - sym_map->l_tls_offset
+ + reloc->r_addend);
+ td->entry = _dl_tlsdesc_return;
+ }
+ }
+ }
+ break;
+ case R_386_TLS_TPOFF32:
+ /* The offset is positive, backward from the thread pointer. */
+ /* We know the offset of object the symbol is contained in.
+ It is a positive value which will be subtracted from the
+ thread pointer. To get the variable position in the TLS
+ block we subtract the offset from that of the TLS block. */
+ if (sym != NULL)
+ {
+ CHECK_STATIC_TLS (map, sym_map);
+ *reloc_addr = sym_map->l_tls_offset - sym->st_value
+ + reloc->r_addend;
+ }
+ break;
+ case R_386_TLS_TPOFF:
+ /* The offset is negative, forward from the thread pointer. */
+ /* We know the offset of object the symbol is contained in.
+ It is a negative value which will be added to the
+ thread pointer. */
+ if (sym != NULL)
+ {
+ CHECK_STATIC_TLS (map, sym_map);
+ *reloc_addr = sym->st_value - sym_map->l_tls_offset
+ + reloc->r_addend;
+ }
+ break;
+ case R_386_COPY:
+ if (sym == NULL)
+ /* This can happen in trace mode if an object could not be
+ found. */
+ break;
+ if (__builtin_expect (sym->st_size > refsym->st_size, 0)
+ || (__builtin_expect (sym->st_size < refsym->st_size, 0)
+ && GLRO(dl_verbose)))
+ {
+ const char *strtab;
+
+ strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+ _dl_error_printf ("\
+%s: Symbol `%s' has different size in shared object, consider re-linking\n",
+ RTLD_PROGNAME, strtab + refsym->st_name);
+ }
+ memcpy (reloc_addr_arg, (void *) value,
+ MIN (sym->st_size, refsym->st_size));
+ break;
+# endif /* !RESOLVE_CONFLICT_FIND_MAP */
+ case R_386_IRELATIVE:
+ value = map->l_addr + reloc->r_addend;
+ value = ((Elf32_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ break;
+ default:
+ /* We add these checks in the version to relocate ld.so only
+ if we are still debugging. */
+ _dl_reloc_bad_type (map, r_type, 0);
+ break;
+ }
+ }
+}
+# endif /* !RTLD_BOOTSTRAP */
+
+auto inline void
+__attribute ((always_inline))
+elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc,
+ void *const reloc_addr_arg)
+{
+ Elf32_Addr *const reloc_addr = reloc_addr_arg;
+ assert (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE);
+ *reloc_addr += l_addr;
+}
+
+# ifndef RTLD_BOOTSTRAP
+auto inline void
+__attribute__ ((always_inline))
+elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
+ void *const reloc_addr_arg)
+{
+ Elf32_Addr *const reloc_addr = reloc_addr_arg;
+ *reloc_addr = l_addr + reloc->r_addend;
+}
+# endif /* !RTLD_BOOTSTRAP */
+
+auto inline void
+__attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map,
+ Elf32_Addr l_addr, const Elf32_Rel *reloc,
+ int skip_ifunc)
+{
+ Elf32_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+ /* Check for unexpected PLT reloc type. */
+ if (__glibc_likely (r_type == R_386_JMP_SLOT))
+ {
+ if (__builtin_expect (map->l_mach.plt, 0) == 0)
+ *reloc_addr += l_addr;
+ else
+ *reloc_addr = (map->l_mach.plt
+ + (((Elf32_Addr) reloc_addr) - map->l_mach.gotplt) * 4);
+ }
+ else if (__glibc_likely (r_type == R_386_TLS_DESC))
+ {
+ struct tlsdesc volatile * __attribute__((__unused__)) td =
+ (struct tlsdesc volatile *)reloc_addr;
+
+ /* Handle relocations that reference the local *ABS* in a simple
+ way, so as to preserve a potential addend. */
+ if (ELF32_R_SYM (reloc->r_info) == 0)
+ td->entry = _dl_tlsdesc_resolve_abs_plus_addend;
+ /* Given a known-zero addend, we can store a pointer to the
+ reloc in the arg position. */
+ else if (td->arg == 0)
+ {
+ td->arg = (void*)reloc;
+ td->entry = _dl_tlsdesc_resolve_rel;
+ }
+ else
+ {
+ /* We could handle non-*ABS* relocations with non-zero addends
+ by allocating dynamically an arg to hold a pointer to the
+ reloc, but that sounds pointless. */
+ const Elf32_Rel *const r = reloc;
+ /* The code below was borrowed from elf_dynamic_do_rel(). */
+ const ElfW(Sym) *const symtab =
+ (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+
+# ifdef RTLD_BOOTSTRAP
+ /* The dynamic linker always uses versioning. */
+ assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL);
+# else
+ if (map->l_info[VERSYMIDX (DT_VERSYM)])
+# endif
+ {
+ const ElfW(Half) *const version =
+ (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
+ elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
+ &map->l_versions[ndx],
+ (void *) (l_addr + r->r_offset), skip_ifunc);
+ }
+# ifndef RTLD_BOOTSTRAP
+ else
+ elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
+ (void *) (l_addr + r->r_offset), skip_ifunc);
+# endif
+ }
+ }
+ else if (__glibc_unlikely (r_type == R_386_IRELATIVE))
+ {
+ Elf32_Addr value = map->l_addr + *reloc_addr;
+ if (__glibc_likely (!skip_ifunc))
+ value = ((Elf32_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ }
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
+}
+
+# ifndef RTLD_BOOTSTRAP
+
+auto inline void
+__attribute__ ((always_inline))
+elf_machine_lazy_rela (struct link_map *map,
+ Elf32_Addr l_addr, const Elf32_Rela *reloc,
+ int skip_ifunc)
+{
+ Elf32_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+ if (__glibc_likely (r_type == R_386_JMP_SLOT))
+ ;
+ else if (__glibc_likely (r_type == R_386_TLS_DESC))
+ {
+ struct tlsdesc volatile * __attribute__((__unused__)) td =
+ (struct tlsdesc volatile *)reloc_addr;
+
+ td->arg = (void*)reloc;
+ td->entry = _dl_tlsdesc_resolve_rela;
+ }
+ else if (__glibc_unlikely (r_type == R_386_IRELATIVE))
+ {
+ Elf32_Addr value = map->l_addr + reloc->r_addend;
+ if (__glibc_likely (!skip_ifunc))
+ value = ((Elf32_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ }
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
+}
+
+# endif /* !RTLD_BOOTSTRAP */
+
+#endif /* RESOLVE_MAP */
diff --git a/REORG.TODO/sysdeps/i386/dl-procinfo.c b/REORG.TODO/sysdeps/i386/dl-procinfo.c
new file mode 100644
index 0000000000..7237f778b2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-procinfo.c
@@ -0,0 +1,65 @@
+/* Data for i386 version of processor capability information.
+ Copyright (C) 2001-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* If anything should be added here check whether the size of each string
+ is still ok with the given array size.
+
+ All the #ifdefs in the definitions are quite irritating but
+ necessary if we want to avoid duplicating the information. There
+ are three different modes:
+
+ - PROCINFO_DECL is defined. This means we are only interested in
+ declarations.
+
+ - PROCINFO_DECL is not defined:
+
+ + if SHARED is defined the file is included in an array
+ initializer. The .element = { ... } syntax is needed.
+
+ + if SHARED is not defined a normal array initialization is
+ needed.
+ */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#include <sysdeps/x86/dl-procinfo.c>
+
+#if !defined PROCINFO_DECL && defined SHARED
+ ._dl_x86_cap_flags
+#else
+PROCINFO_CLASS const char _dl_x86_cap_flags[32][8]
+#endif
+#ifndef PROCINFO_DECL
+= {
+ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+ "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush", "20", "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe"
+ }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/REORG.TODO/sysdeps/i386/dl-tls.h b/REORG.TODO/sysdeps/i386/dl-tls.h
new file mode 100644
index 0000000000..525ebab992
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-tls.h
@@ -0,0 +1,61 @@
+/* Thread-local storage handling in the ELF dynamic linker. i386 version.
+ Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+/* Type used for the representation of TLS information in the GOT. */
+typedef struct dl_tls_index
+{
+ unsigned long int ti_module;
+ unsigned long int ti_offset;
+} tls_index;
+
+
+#ifdef SHARED
+/* This is the prototype for the GNU version. */
+extern void *___tls_get_addr (tls_index *ti)
+ __attribute__ ((__regparm__ (1)));
+extern void *___tls_get_addr_internal (tls_index *ti)
+ __attribute__ ((__regparm__ (1))) attribute_hidden;
+
+# if IS_IN (rtld)
+/* The special thing about the x86 TLS ABI is that we have two
+ variants of the __tls_get_addr function with different calling
+ conventions. The GNU version, which we are mostly concerned here,
+ takes the parameter in a register. The name is changed by adding
+ an additional underscore at the beginning. The Sun version uses
+ the normal calling convention. */
+void *
+__tls_get_addr (tls_index *ti)
+{
+ return ___tls_get_addr_internal (ti);
+}
+
+
+/* Prepare using the definition of __tls_get_addr in the generic
+ version of this file. */
+# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr
+strong_alias (___tls_get_addr, ___tls_get_addr_internal)
+rtld_hidden_proto (___tls_get_addr)
+rtld_hidden_def (___tls_get_addr)
+#else
+
+/* Users should get the better interface. */
+# define __tls_get_addr ___tls_get_addr
+
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/dl-tlsdesc.S b/REORG.TODO/sysdeps/i386/dl-tlsdesc.S
new file mode 100644
index 0000000000..8befdc2b39
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-tlsdesc.S
@@ -0,0 +1,285 @@
+/* Thread-local storage handling in the ELF dynamic linker. i386 version.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+ .text
+
+ /* This function is used to compute the TP offset for symbols in
+ Static TLS, i.e., whose TP offset is the same for all
+ threads.
+
+ The incoming %eax points to the TLS descriptor, such that
+ 0(%eax) points to _dl_tlsdesc_return itself, and 4(%eax) holds
+ the TP offset of the symbol corresponding to the object
+ denoted by the argument. */
+
+ .hidden _dl_tlsdesc_return
+ .global _dl_tlsdesc_return
+ .type _dl_tlsdesc_return,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_return:
+ movl 4(%eax), %eax
+ ret
+ cfi_endproc
+ .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+ /* This function is used for undefined weak TLS symbols, for
+ which the base address (i.e., disregarding any addend) should
+ resolve to NULL.
+
+ %eax points to the TLS descriptor, such that 0(%eax) points to
+ _dl_tlsdesc_undefweak itself, and 4(%eax) holds the addend.
+ We return the addend minus the TP, such that, when the caller
+ adds TP, it gets the addend back. If that's zero, as usual,
+ that's most likely a NULL pointer. */
+
+ .hidden _dl_tlsdesc_undefweak
+ .global _dl_tlsdesc_undefweak
+ .type _dl_tlsdesc_undefweak,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_undefweak:
+ movl 4(%eax), %eax
+ subl %gs:0, %eax
+ ret
+ cfi_endproc
+ .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,@function
+
+ /* This function is used for symbols that need dynamic TLS.
+
+ %eax points to the TLS descriptor, such that 0(%eax) points to
+ _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
+ tlsdesc_dynamic_arg object. It must return in %eax the offset
+ between the thread pointer and the object denoted by the
+ argument, without clobbering any registers.
+
+ The assembly code that follows is a rendition of the following
+ C code, hand-optimized a little bit.
+
+ptrdiff_t
+__attribute__ ((__regparm__ (1)))
+_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+{
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+ if (__builtin_expect (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+}
+*/
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_dynamic:
+ /* Like all TLS resolvers, preserve call-clobbered registers.
+ We need two scratch regs anyway. */
+ subl $28, %esp
+ cfi_adjust_cfa_offset (28)
+ movl %ecx, 20(%esp)
+ movl %edx, 24(%esp)
+ movl TLSDESC_ARG(%eax), %eax
+ movl %gs:DTV_OFFSET, %edx
+ movl TLSDESC_GEN_COUNT(%eax), %ecx
+ cmpl (%edx), %ecx
+ ja .Lslow
+ movl TLSDESC_MODID(%eax), %ecx
+ movl (%edx,%ecx,8), %edx
+ cmpl $-1, %edx
+ je .Lslow
+ movl TLSDESC_MODOFF(%eax), %eax
+ addl %edx, %eax
+.Lret:
+ movl 20(%esp), %ecx
+ subl %gs:0, %eax
+ movl 24(%esp), %edx
+ addl $28, %esp
+ cfi_adjust_cfa_offset (-28)
+ ret
+ .p2align 4,,7
+.Lslow:
+ cfi_adjust_cfa_offset (28)
+ call HIDDEN_JUMPTARGET (___tls_get_addr)
+ jmp .Lret
+ cfi_endproc
+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+#endif /* SHARED */
+
+ /* This function is a wrapper for a lazy resolver for TLS_DESC
+ REL relocations that reference the *ABS* segment in their own
+ link maps. %ebx points to the caller's GOT. %eax points to a
+ TLS descriptor, such that 0(%eax) holds the address of the
+ resolver wrapper itself (unless some other thread beat us to
+ it) and 4(%eax) holds the addend in the relocation.
+
+ When the actual resolver returns, it will have adjusted the
+ TLS descriptor such that we can tail-call it for it to return
+ the TP offset of the symbol. */
+
+ .hidden _dl_tlsdesc_resolve_abs_plus_addend
+ .global _dl_tlsdesc_resolve_abs_plus_addend
+ .type _dl_tlsdesc_resolve_abs_plus_addend,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_resolve_abs_plus_addend:
+0:
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl $1f - 0b, %ecx
+ movl 4(%ebx), %edx
+ call _dl_tlsdesc_resolve_abs_plus_addend_fixup
+1:
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ jmp *(%eax)
+ cfi_endproc
+ .size _dl_tlsdesc_resolve_abs_plus_addend, .-_dl_tlsdesc_resolve_abs_plus_addend
+
+ /* This function is a wrapper for a lazy resolver for TLS_DESC
+ REL relocations that had zero addends. %ebx points to the
+ caller's GOT. %eax points to a TLS descriptor, such that
+ 0(%eax) holds the address of the resolver wrapper itself
+ (unless some other thread beat us to it) and 4(%eax) holds a
+ pointer to the relocation.
+
+ When the actual resolver returns, it will have adjusted the
+ TLS descriptor such that we can tail-call it for it to return
+ the TP offset of the symbol. */
+
+ .hidden _dl_tlsdesc_resolve_rel
+ .global _dl_tlsdesc_resolve_rel
+ .type _dl_tlsdesc_resolve_rel,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_resolve_rel:
+0:
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl $1f - 0b, %ecx
+ movl 4(%ebx), %edx
+ call _dl_tlsdesc_resolve_rel_fixup
+1:
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ jmp *(%eax)
+ cfi_endproc
+ .size _dl_tlsdesc_resolve_rel, .-_dl_tlsdesc_resolve_rel
+
+ /* This function is a wrapper for a lazy resolver for TLS_DESC
+ RELA relocations. %ebx points to the caller's GOT. %eax
+ points to a TLS descriptor, such that 0(%eax) holds the
+ address of the resolver wrapper itself (unless some other
+ thread beat us to it) and 4(%eax) holds a pointer to the
+ relocation.
+
+ When the actual resolver returns, it will have adjusted the
+ TLS descriptor such that we can tail-call it for it to return
+ the TP offset of the symbol. */
+
+ .hidden _dl_tlsdesc_resolve_rela
+ .global _dl_tlsdesc_resolve_rela
+ .type _dl_tlsdesc_resolve_rela,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_resolve_rela:
+0:
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl $1f - 0b, %ecx
+ movl 4(%ebx), %edx
+ call _dl_tlsdesc_resolve_rela_fixup
+1:
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ jmp *(%eax)
+ cfi_endproc
+ .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
+
+ /* This function is a placeholder for lazy resolving of TLS
+ relocations. Once some thread starts resolving a TLS
+ relocation, it sets up the TLS descriptor to use this
+ resolver, such that other threads that would attempt to
+ resolve it concurrently may skip the call to the original lazy
+ resolver and go straight to a condition wait.
+
+ When the actual resolver returns, it will have adjusted the
+ TLS descriptor such that we can tail-call it for it to return
+ the TP offset of the symbol. */
+
+ .hidden _dl_tlsdesc_resolve_hold
+ .global _dl_tlsdesc_resolve_hold
+ .type _dl_tlsdesc_resolve_hold,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_resolve_hold:
+0:
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl $1f - 0b, %ecx
+ movl 4(%ebx), %edx
+ call _dl_tlsdesc_resolve_hold_fixup
+1:
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ jmp *(%eax)
+ cfi_endproc
+ .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/REORG.TODO/sysdeps/i386/dl-tlsdesc.h b/REORG.TODO/sysdeps/i386/dl-tlsdesc.h
new file mode 100644
index 0000000000..242bebfc8e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-tlsdesc.h
@@ -0,0 +1,61 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+ i386 version.
+ Copyright (C) 2005-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _I386_DL_TLSDESC_H
+# define _I386_DL_TLSDESC_H 1
+
+/* Type used to represent a TLS descriptor in the GOT. */
+struct tlsdesc
+{
+ ptrdiff_t __attribute__ ((regparm (1))) (*entry) (struct tlsdesc *);
+ void *arg;
+};
+
+typedef struct dl_tls_index
+{
+ unsigned long int ti_module;
+ unsigned long int ti_offset;
+} tls_index;
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+ needs dynamic TLS offsets. */
+struct tlsdesc_dynamic_arg
+{
+ tls_index tlsinfo;
+ size_t gen_count;
+};
+
+extern ptrdiff_t attribute_hidden __attribute__ ((regparm (1)))
+ _dl_tlsdesc_return (struct tlsdesc *),
+ _dl_tlsdesc_undefweak (struct tlsdesc *),
+ _dl_tlsdesc_resolve_abs_plus_addend (struct tlsdesc *),
+ _dl_tlsdesc_resolve_rel (struct tlsdesc *),
+ _dl_tlsdesc_resolve_rela (struct tlsdesc *),
+ _dl_tlsdesc_resolve_hold (struct tlsdesc *);
+
+# ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
+ size_t ti_offset)
+ internal_function attribute_hidden;
+
+extern ptrdiff_t attribute_hidden __attribute__ ((regparm (1)))
+ _dl_tlsdesc_dynamic (struct tlsdesc *);
+# endif
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/dl-trampoline.S b/REORG.TODO/sysdeps/i386/dl-trampoline.S
new file mode 100644
index 0000000000..6e7f3aef92
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/dl-trampoline.S
@@ -0,0 +1,215 @@
+/* PLT trampolines. i386 version.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <link-defines.h>
+
+#ifdef HAVE_MPX_SUPPORT
+# define PRESERVE_BND_REGS_PREFIX bnd
+#else
+# define PRESERVE_BND_REGS_PREFIX .byte 0xf2
+#endif
+
+ .text
+ .globl _dl_runtime_resolve
+ .type _dl_runtime_resolve, @function
+ cfi_startproc
+ .align 16
+_dl_runtime_resolve:
+ cfi_adjust_cfa_offset (8)
+ pushl %eax # Preserve registers otherwise clobbered.
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl 16(%esp), %edx # Copy args pushed by PLT in register. Note
+ movl 12(%esp), %eax # that `fixup' takes its parameters in regs.
+ call _dl_fixup # Call resolver.
+ popl %edx # Get register content back.
+ cfi_adjust_cfa_offset (-4)
+ movl (%esp), %ecx
+ movl %eax, (%esp) # Store the function address.
+ movl 4(%esp), %eax
+ ret $12 # Jump to function address.
+ cfi_endproc
+ .size _dl_runtime_resolve, .-_dl_runtime_resolve
+
+
+#ifndef PROF
+ .globl _dl_runtime_profile
+ .type _dl_runtime_profile, @function
+ cfi_startproc
+ .align 16
+_dl_runtime_profile:
+ cfi_adjust_cfa_offset (8)
+ pushl %esp
+ cfi_adjust_cfa_offset (4)
+ addl $8, (%esp) # Account for the pushed PLT data
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %eax # Preserve registers otherwise clobbered.
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl %esp, %ecx
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ movl $-1, 4(%esp)
+ leal 4(%esp), %edx
+ movl %edx, (%esp)
+ pushl %ecx # Address of the register structure
+ cfi_adjust_cfa_offset (4)
+ movl 40(%esp), %ecx # Load return address
+ movl 36(%esp), %edx # Copy args pushed by PLT in register. Note
+ movl 32(%esp), %eax # that `fixup' takes its parameters in regs.
+ call _dl_profile_fixup # Call resolver.
+ cfi_adjust_cfa_offset (-8)
+ movl (%esp), %edx
+ testl %edx, %edx
+ jns 1f
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ popl %edx # Get register content back.
+ cfi_adjust_cfa_offset (-4)
+ movl (%esp), %ecx
+ movl %eax, (%esp) # Store the function address.
+ movl 4(%esp), %eax
+ ret $20 # Jump to function address.
+
+ /*
+ +32 return address
+ +28 PLT1
+ +24 PLT2
+ +20 %esp
+ +16 %ebp
+ +12 %eax
+ +8 %ecx
+ +4 %edx
+ %esp free
+ */
+ cfi_adjust_cfa_offset (8)
+1: movl %ebx, (%esp)
+ cfi_rel_offset (ebx, 0)
+ movl %edx, %ebx # This is the frame buffer size
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (esi, 0)
+ leal 44(%esp), %esi
+ movl %ebx, %ecx
+ orl $4, %ebx # Increase frame size if necessary to align
+ # stack for the function call
+ andl $~3, %ebx
+ movl %esp, %edi
+ subl %ebx, %edi
+ movl %esp, %ebx
+ cfi_def_cfa_register (ebx)
+ movl %edi, %esp
+ shrl $2, %ecx
+ rep
+ movsl
+ movl (%ebx), %esi
+ cfi_restore (esi)
+ movl 4(%ebx), %edi
+ cfi_restore (edi)
+ /*
+ %ebx+40 return address
+ %ebx+36 PLT1
+ %ebx+32 PLT2
+ %ebx+28 %esp
+ %ebx+24 %ebp
+ %ebx+20 %eax
+ %ebx+16 %ecx
+ %ebx+12 %edx
+ %ebx+8 %ebx
+ %ebx+4 free
+ %ebx free
+ %esp copied stack frame
+ */
+ movl %eax, (%ebx)
+ movl 12(%ebx), %edx
+ movl 16(%ebx), %ecx
+ movl 20(%ebx), %eax
+ call *(%ebx)
+ movl %ebx, %esp
+ cfi_def_cfa_register (esp)
+ movl 8(%esp), %ebx
+ cfi_restore (ebx)
+ /*
+ +40 return address
+ +36 PLT1
+ +32 PLT2
+ +28 %esp
+ +24 %ebp
+ +20 %eax
+ +16 %ecx
+ +12 %edx
+ +8 free
+ +4 free
+ %esp free
+ */
+#if LONG_DOUBLE_SIZE != 12
+# error "long double size must be 12 bytes"
+#endif
+ # Allocate space for La_i86_retval and subtract 12 free bytes.
+ subl $(LRV_SIZE - 12), %esp
+ cfi_adjust_cfa_offset (LRV_SIZE - 12)
+ movl %eax, LRV_EAX_OFFSET(%esp)
+ movl %edx, LRV_EDX_OFFSET(%esp)
+ fstpt LRV_ST0_OFFSET(%esp)
+ fstpt LRV_ST1_OFFSET(%esp)
+#ifdef HAVE_MPX_SUPPORT
+ bndmov %bnd0, LRV_BND0_OFFSET(%esp)
+ bndmov %bnd1, LRV_BND1_OFFSET(%esp)
+#else
+ .byte 0x66,0x0f,0x1b,0x44,0x24,LRV_BND0_OFFSET
+ .byte 0x66,0x0f,0x1b,0x4c,0x24,LRV_BND1_OFFSET
+#endif
+ pushl %esp
+ cfi_adjust_cfa_offset (4)
+ # Address of La_i86_regs area.
+ leal (LRV_SIZE + 4)(%esp), %ecx
+ # PLT2
+ movl (LRV_SIZE + 4 + LR_SIZE)(%esp), %eax
+ # PLT1
+ movl (LRV_SIZE + 4 + LR_SIZE + 4)(%esp), %edx
+ call _dl_call_pltexit
+ movl LRV_EAX_OFFSET(%esp), %eax
+ movl LRV_EDX_OFFSET(%esp), %edx
+ fldt LRV_ST1_OFFSET(%esp)
+ fldt LRV_ST0_OFFSET(%esp)
+#ifdef HAVE_MPX_SUPPORT
+ bndmov LRV_BND0_OFFSET(%esp), %bnd0
+ bndmov LRV_BND1_OFFSET(%esp), %bnd1
+#else
+ .byte 0x66,0x0f,0x1a,0x44,0x24,LRV_BND0_OFFSET
+ .byte 0x66,0x0f,0x1a,0x4c,0x24,LRV_BND1_OFFSET
+#endif
+ # Restore stack before return.
+ addl $(LRV_SIZE + 4 + LR_SIZE + 4), %esp
+ cfi_adjust_cfa_offset (-(LRV_SIZE + 4 + LR_SIZE + 4))
+ PRESERVE_BND_REGS_PREFIX
+ ret
+ cfi_endproc
+ .size _dl_runtime_profile, .-_dl_runtime_profile
+#endif
diff --git a/REORG.TODO/sysdeps/i386/ffs.c b/REORG.TODO/sysdeps/i386/ffs.c
new file mode 100644
index 0000000000..c229c8166e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/ffs.c
@@ -0,0 +1,50 @@
+/* ffs -- find first set bit in a word, counted from least significant end.
+ For Intel 80x86, x>=3.
+ This file is part of the GNU C Library.
+ Copyright (C) 1991-2017 Free Software Foundation, Inc.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define ffsl __something_else
+#include <string.h>
+
+#undef ffs
+
+#ifdef __GNUC__
+
+int
+__ffs (int x)
+{
+ int cnt;
+ int tmp;
+
+ asm ("xorl %0,%0\n" /* Set CNT to zero. */
+ "bsfl %2,%1\n" /* Count low bits in X and store in %1. */
+ "jz 1f\n" /* Jump if OK, i.e. X was non-zero. */
+ "leal 1(%1),%0\n" /* Return bsfl-result plus one on %0. */
+ "1:" : "=&a" (cnt), "=r" (tmp) : "rm" (x));
+
+ return cnt;
+}
+weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
+libc_hidden_builtin_def (ffs)
+#undef ffsl
+weak_alias (__ffs, ffsl)
+
+#else
+#include <string/ffs.c>
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/Implies b/REORG.TODO/sysdeps/i386/fpu/Implies
new file mode 100644
index 0000000000..2b745a34fb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/Implies
@@ -0,0 +1 @@
+x86/fpu
diff --git a/REORG.TODO/sysdeps/i386/fpu/Versions b/REORG.TODO/sysdeps/i386/fpu/Versions
new file mode 100644
index 0000000000..a2eec371f1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/Versions
@@ -0,0 +1,6 @@
+libm {
+ GLIBC_2.2 {
+ # functions used in inline functions or macros
+ __expl; __expm1l;
+ }
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/doasin.c b/REORG.TODO/sysdeps/i386/fpu/doasin.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/doasin.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_acos.S b/REORG.TODO/sysdeps/i386/fpu/e_acos.S
new file mode 100644
index 0000000000..586c7fc406
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_acos.S
@@ -0,0 +1,25 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: e_acos.S,v 1.4 1995/05/08 23:44:37 jtc Exp $")
+
+/* acos = atan (sqrt((1-x) (1+x)) / x) */
+ENTRY(__ieee754_acos)
+ fldl 4(%esp) /* x */
+ fld %st /* x : x */
+ fld1 /* 1 : x : x */
+ fsubp /* 1 - x : x */
+ fld1 /* 1 : 1 - x : x */
+ fadd %st(2) /* 1 + x : 1 - x : x */
+ fmulp /* 1 - x^2 : x */
+ fsqrt /* sqrt (1 - x^2) : x */
+ fabs
+ fxch %st(1) /* x : sqrt (1 - x^2) */
+ fpatan /* atan (sqrt(1 - x^2) / x) */
+ ret
+END (__ieee754_acos)
+strong_alias (__ieee754_acos, __acos_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_acosf.S b/REORG.TODO/sysdeps/i386/fpu/e_acosf.S
new file mode 100644
index 0000000000..54930af8b2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_acosf.S
@@ -0,0 +1,24 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: $")
+
+/* acos = atan (sqrt(1 - x^2) / x) */
+ENTRY(__ieee754_acosf)
+ flds 4(%esp) /* x */
+ fld %st
+ fmul %st(0) /* x^2 */
+ fld1
+ fsubp /* 1 - x^2 */
+ fsqrt /* sqrt (1 - x^2) */
+ fabs
+ fxch %st(1)
+ fpatan
+ ret
+END (__ieee754_acosf)
+strong_alias (__ieee754_acosf, __acosf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_acosh.S b/REORG.TODO/sysdeps/i386/fpu/e_acosh.S
new file mode 100644
index 0000000000..9555ef8078
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_acosh.S
@@ -0,0 +1,101 @@
+/* ix87 specific implementation of arcsinh.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_acosh)
+ movl 8(%esp), %ecx
+ cmpl $0x3ff00000, %ecx
+ jl 5f // < 1 => invalid
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+ cmpl $0x41b00000, %ecx
+ ja 3f // x > 2^28
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpl $0x40000000, %ecx
+ ja 4f // x > 2
+
+ // 1 <= x <= 2 => y = log1p(x-1+sqrt(2*(x-1)+(x-1)^2))
+ fsubl MO(one) // x-1 : log(2)
+ fabs // acosh(1) is +0 in all rounding modes
+ fld %st // x-1 : x-1 : log(2)
+ fmul %st(1) // (x-1)^2 : x-1 : log(2)
+ fadd %st(1) // x-1+(x-1)^2 : x-1 : log(2)
+ fadd %st(1) // 2*(x-1)+(x-1)^2 : x-1 : log(2)
+ fsqrt // sqrt(2*(x-1)+(x-1)^2) : x-1 : log(2)
+ faddp // x-1+sqrt(2*(x-1)+(x-1)^2) : log(2)
+ fcoml MO(limit)
+ fnstsw
+ sahf
+ ja 2f
+ fyl2xp1 // log1p(x-1+sqrt(2*(x-1)+(x-1)^2))
+ ret
+
+2: faddl MO(one) // x+sqrt(2*(x-1)+(x-1)^2) : log(2)
+ fyl2x // log(x+sqrt(2*(x-1)+(x-1)^2))
+ ret
+
+ // x > 2^28 => y = log(x) + log(2)
+ .align ALIGNARG(4)
+3: fyl2x // log(x)
+ fldln2 // log(2) : log(x)
+ faddp // log(x)+log(2)
+ ret
+
+ // 2^28 > x > 2 => y = log(2*x - 1/(x+sqrt(x*x-1)))
+ .align ALIGNARG(4)
+4: fld %st // x : x : log(2)
+ fadd %st, %st(1) // x : 2*x : log(2)
+ fld %st // x : x : 2*x : log(2)
+ fmul %st(1) // x^2 : x : 2*x : log(2)
+ fsubl MO(one) // x^2-1 : x : 2*x : log(2)
+ fsqrt // sqrt(x^2-1) : x : 2*x : log(2)
+ faddp // x+sqrt(x^2-1) : 2*x : log(2)
+ fdivrl MO(one) // 1/(x+sqrt(x^2-1)) : 2*x : log(2)
+ fsubrp // 2*x+1/(x+sqrt(x^2)-1) : log(2)
+ fyl2x // log(2*x+1/(x+sqrt(x^2-1)))
+ ret
+
+ // x < 1 (or -NaN) => NaN
+ .align ALIGNARG(4)
+5: fldl 4(%esp)
+ fsub %st
+ fdiv %st, %st(0)
+ ret
+END(__ieee754_acosh)
+strong_alias (__ieee754_acosh, __acosh_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_acoshf.S b/REORG.TODO/sysdeps/i386/fpu/e_acoshf.S
new file mode 100644
index 0000000000..662fda3c06
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_acoshf.S
@@ -0,0 +1,101 @@
+/* ix87 specific implementation of arcsinh.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_acoshf)
+ movl 4(%esp), %ecx
+ cmpl $0x3f800000, %ecx
+ jl 5f // < 1 => invalid
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+ cmpl $0x47000000, %ecx
+ ja 3f // x > 2^14
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpl $0x40000000, %ecx
+ ja 4f // x > 2
+
+ // 1 <= x <= 2 => y = log1p(x-1+sqrt(2*(x-1)+(x-1)^2))
+ fsubl MO(one) // x-1 : log(2)
+ fabs // acosh(1) is +0 in all rounding modes
+ fld %st // x-1 : x-1 : log(2)
+ fmul %st(1) // (x-1)^2 : x-1 : log(2)
+ fadd %st(1) // x-1+(x-1)^2 : x-1 : log(2)
+ fadd %st(1) // 2*(x-1)+(x-1)^2 : x-1 : log(2)
+ fsqrt // sqrt(2*(x-1)+(x-1)^2) : x-1 : log(2)
+ faddp // x-1+sqrt(2*(x-1)+(x-1)^2) : log(2)
+ fcoml MO(limit)
+ fnstsw
+ sahf
+ ja 2f
+ fyl2xp1 // log1p(x-1+sqrt(2*(x-1)+(x-1)^2))
+ ret
+
+2: faddl MO(one) // x+sqrt(2*(x-1)+(x-1)^2) : log(2)
+ fyl2x // log(x+sqrt(2*(x-1)+(x-1)^2))
+ ret
+
+ // x > 2^14 => y = log(x) + log(2)
+ .align ALIGNARG(4)
+3: fyl2x // log(x)
+ fldln2 // log(2) : log(x)
+ faddp // log(x)+log(2)
+ ret
+
+ // 2^28 > x > 2 => y = log(2*x - 1/(x+sqrt(x*x-1)))
+ .align ALIGNARG(4)
+4: fld %st // x : x : log(2)
+ fadd %st, %st(1) // x : 2*x : log(2)
+ fld %st // x : x : 2*x : log(2)
+ fmul %st(1) // x^2 : x : 2*x : log(2)
+ fsubl MO(one) // x^2-1 : x : 2*x : log(2)
+ fsqrt // sqrt(x^2-1) : x : 2*x : log(2)
+ faddp // x+sqrt(x^2-1) : 2*x : log(2)
+ fdivrl MO(one) // 1/(x+sqrt(x^2-1)) : 2*x : log(2)
+ fsubrp // 2*x+1/(x+sqrt(x^2)-1) : log(2)
+ fyl2x // log(2*x+1/(x+sqrt(x^2-1)))
+ ret
+
+ // x < 1 (or -NaN) => NaN
+ .align ALIGNARG(4)
+5: flds 4(%esp)
+ fsub %st
+ fdiv %st, %st(0)
+ ret
+END(__ieee754_acoshf)
+strong_alias (__ieee754_acoshf, __acoshf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_acoshl.S b/REORG.TODO/sysdeps/i386/fpu/e_acoshl.S
new file mode 100644
index 0000000000..e0d6466aac
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_acoshl.S
@@ -0,0 +1,107 @@
+/* ix87 specific implementation of arcsinh.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ /* Please note that we use double value for 1.0. This number
+ has an exact representation and so we don't get accuracy
+ problems. The advantage is that the code is simpler. */
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_acoshl)
+ movl 12(%esp), %ecx
+ andl $0xffff, %ecx
+ cmpl $0x3fff, %ecx
+ jl 5f // < 1 => invalid
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+ cmpl $0x4020, %ecx
+ ja 3f // x > 2^34
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpl $0x4000, %ecx
+ ja 4f // x > 2
+
+ // 1 <= x <= 2 => y = log1p(x-1+sqrt(2*(x-1)+(x-1)^2))
+ fsubl MO(one) // x-1 : log(2)
+ fabs // acosh(1) is +0 in all rounding modes
+ fld %st // x-1 : x-1 : log(2)
+ fmul %st(1) // (x-1)^2 : x-1 : log(2)
+ fadd %st(1) // x-1+(x-1)^2 : x-1 : log(2)
+ fadd %st(1) // 2*(x-1)+(x-1)^2 : x-1 : log(2)
+ fsqrt // sqrt(2*(x-1)+(x-1)^2) : x-1 : log(2)
+ faddp // x-1+sqrt(2*(x-1)+(x-1)^2) : log(2)
+ fcoml MO(limit)
+ fnstsw
+ sahf
+ ja 2f
+ fyl2xp1 // log1p(x-1+sqrt(2*(x-1)+(x-1)^2))
+ ret
+
+2: faddl MO(one) // x+sqrt(2*(x-1)+(x-1)^2) : log(2)
+ fyl2x // log(x+sqrt(2*(x-1)+(x-1)^2))
+ ret
+
+ // x > 2^34 => y = log(x) + log(2)
+ .align ALIGNARG(4)
+3: fyl2x // log(x)
+ fldln2 // log(2) : log(x)
+ faddp // log(x)+log(2)
+ ret
+
+ // 2^34 > x > 2 => y = log(2*x - 1/(x+sqrt(x*x-1)))
+ .align ALIGNARG(4)
+4: fld %st // x : x : log(2)
+ fadd %st, %st(1) // x : 2*x : log(2)
+ fld %st // x : x : 2*x : log(2)
+ fmul %st(1) // x^2 : x : 2*x : log(2)
+ fsubl MO(one) // x^2-1 : x : 2*x : log(2)
+ fsqrt // sqrt(x^2-1) : x : 2*x : log(2)
+ faddp // x+sqrt(x^2-1) : 2*x : log(2)
+ fdivrl MO(one) // 1/(x+sqrt(x^2-1)) : 2*x : log(2)
+ fsubrp // 2*x+1/(x+sqrt(x^2)-1) : log(2)
+ fyl2x // log(2*x+1/(x+sqrt(x^2-1)))
+ ret
+
+ // x < 1 => NaN
+ .align ALIGNARG(4)
+5: fldz
+ fdiv %st, %st(0)
+ ret
+END(__ieee754_acoshl)
+strong_alias (__ieee754_acoshl, __acoshl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_acosl.c b/REORG.TODO/sysdeps/i386/fpu/e_acosl.c
new file mode 100644
index 0000000000..ab08931924
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_acosl.c
@@ -0,0 +1,29 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <math_private.h>
+
+long double
+__ieee754_acosl (long double x)
+{
+ long double res;
+
+ /* acosl = atanl (sqrtl((1-x) (1+x)) / x) */
+ asm ( "fld %%st\n"
+ "fld1\n"
+ "fsubp\n"
+ "fld1\n"
+ "fadd %%st(2)\n"
+ "fmulp\n" /* 1 - x^2 */
+ "fsqrt\n" /* sqrtl (1 - x^2) */
+ "fabs\n"
+ "fxch %%st(1)\n"
+ "fpatan"
+ : "=t" (res) : "0" (x) : "st(1)");
+ return res;
+}
+strong_alias (__ieee754_acosl, __acosl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_asin.S b/REORG.TODO/sysdeps/i386/fpu/e_asin.S
new file mode 100644
index 0000000000..39c8b47da4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_asin.S
@@ -0,0 +1,38 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: e_asin.S,v 1.4 1995/05/08 23:45:40 jtc Exp $")
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+
+/* asin = atan (x / sqrt((1-x) (1+x))) */
+ENTRY(__ieee754_asin)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl 4(%esp) /* x */
+ fld %st
+ fld1 /* 1 : x : x */
+ fsubp /* 1 - x : x */
+ fld1 /* 1 : 1 - x : x */
+ fadd %st(2) /* 1 + x : 1 - x : x */
+ fmulp /* 1 - x^2 */
+ fsqrt /* sqrt (1 - x^2) */
+ fpatan
+ DBL_CHECK_FORCE_UFLOW
+ ret
+END (__ieee754_asin)
+strong_alias (__ieee754_asin, __asin_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_asinf.S b/REORG.TODO/sysdeps/i386/fpu/e_asinf.S
new file mode 100644
index 0000000000..1102bdedfd
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_asinf.S
@@ -0,0 +1,39 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: $")
+
+ .section .rodata.cst4,"aM",@progbits,4
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+
+/* asin = atan (x / sqrt(1 - x^2)) */
+ENTRY(__ieee754_asinf)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ flds 4(%esp) /* x */
+ fld %st
+ fmul %st(0) /* x^2 */
+ fld1
+ fsubp /* 1 - x^2 */
+ fsqrt /* sqrt (1 - x^2) */
+ fpatan
+ FLT_CHECK_FORCE_UFLOW
+ ret
+END (__ieee754_asinf)
+strong_alias (__ieee754_asinf, __asinf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_atan2.S b/REORG.TODO/sysdeps/i386/fpu/e_atan2.S
new file mode 100644
index 0000000000..25f43bb5a1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_atan2.S
@@ -0,0 +1,30 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: e_atan2.S,v 1.4 1995/05/08 23:46:28 jtc Exp $")
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_atan2)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl 4(%esp)
+ fldl 12(%esp)
+ fpatan
+ DBL_CHECK_FORCE_UFLOW_NARROW
+ ret
+END (__ieee754_atan2)
+strong_alias (__ieee754_atan2, __atan2_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_atan2f.S b/REORG.TODO/sysdeps/i386/fpu/e_atan2f.S
new file mode 100644
index 0000000000..2bc909a762
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_atan2f.S
@@ -0,0 +1,30 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: e_atan2f.S,v 1.1 1995/05/08 23:35:10 jtc Exp $")
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_atan2f)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ flds 4(%esp)
+ flds 8(%esp)
+ fpatan
+ FLT_CHECK_FORCE_UFLOW_NARROW
+ ret
+END (__ieee754_atan2f)
+strong_alias (__ieee754_atan2f, __atan2f_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_atan2l.c b/REORG.TODO/sysdeps/i386/fpu/e_atan2l.c
new file mode 100644
index 0000000000..9f88bfcc08
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_atan2l.c
@@ -0,0 +1,19 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <math_private.h>
+
+long double
+__ieee754_atan2l (long double y, long double x)
+{
+ long double res;
+
+ asm ("fpatan" : "=t" (res) : "u" (y), "0" (x) : "st(1)");
+
+ return res;
+}
+strong_alias (__ieee754_atan2l, __atan2l_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_atanh.S b/REORG.TODO/sysdeps/i386/fpu/e_atanh.S
new file mode 100644
index 0000000000..cbc93d5da2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_atanh.S
@@ -0,0 +1,112 @@
+/* ix87 specific implementation of arctanh function.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type half,@object
+half: .double 0.5
+ ASM_SIZE_DIRECTIVE(half)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ .type ln2_2,@object
+ln2_2: .tfloat 0.3465735902799726547086160
+ ASM_SIZE_DIRECTIVE(ln2_2)
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_atanh)
+ movl 8(%esp), %ecx
+
+ movl %ecx, %eax
+ andl $0x7fffffff, %eax
+ cmpl $0x7ff00000, %eax
+ jae 5f
+7:
+
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ andl $0x80000000, %ecx // ECX == 0 iff X >= 0
+
+ fldt MO(ln2_2) // 0.5*ln2
+ xorl %ecx, 8(%esp)
+ fldl 4(%esp) // |x| : 0.5*ln2
+ fcoml MO(half) // |x| : 0.5*ln2
+ fld %st // |x| : |x| : 0.5*ln2
+ fnstsw // |x| : |x| : 0.5*ln2
+ sahf
+ jae 2f
+ fadd %st, %st(1) // |x| : 2*|x| : 0.5*ln2
+ fld %st // |x| : |x| : 2*|x| : 0.5*ln2
+ fsubrl MO(one) // 1-|x| : |x| : 2*|x| : 0.5*ln2
+ fxch // |x| : 1-|x| : 2*|x| : 0.5*ln2
+ fmul %st(2) // 2*|x|^2 : 1-|x| : 2*|x| : 0.5*ln2
+ fdivp // (2*|x|^2)/(1-|x|) : 2*|x| : 0.5*ln2
+ faddp // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fcoml MO(limit) // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fnstsw // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ sahf
+ jae 4f
+ fyl2xp1 // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|))
+ DBL_CHECK_FORCE_UFLOW_NONNEG
+ jecxz 3f
+ fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x))
+3: ret
+
+ .align ALIGNARG(4)
+4: faddl MO(one) // 1+2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fyl2x // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x))
+3: ret
+
+ .align ALIGNARG(4)
+2: faddl MO(one) // 1+|x| : |x| : 0.5*ln2
+ fxch // |x| : 1+|x| : 0.5*ln2
+ fsubrl MO(one) // 1-|x| : 1+|x| : 0.5*ln2
+ fdivrp // (1+|x|)/(1-|x|) : 0.5*ln2
+ fyl2x // 0.5*ln2*ld((1+|x|)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld((1+x)/(1-x))
+3: ret
+
+ // x == NaN or ±Inf
+5: ja 6f
+ cmpl $0, 4(%esp)
+ je 7b
+6: fldl 4(%esp)
+ ret
+END(__ieee754_atanh)
+strong_alias (__ieee754_atanh, __atanh_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_atanhf.S b/REORG.TODO/sysdeps/i386/fpu/e_atanhf.S
new file mode 100644
index 0000000000..92fda3fd82
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_atanhf.S
@@ -0,0 +1,109 @@
+/* ix87 specific implementation of arctanh function.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type half,@object
+half: .double 0.5
+ ASM_SIZE_DIRECTIVE(half)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ .align ALIGNARG(4)
+ .type ln2_2,@object
+ln2_2: .tfloat 0.3465735902799726547086160
+ ASM_SIZE_DIRECTIVE(ln2_2)
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_atanhf)
+ movl 4(%esp), %ecx
+
+ movl %ecx, %eax
+ andl $0x7fffffff, %eax
+ cmpl $0x7f800000, %eax
+ ja 5f
+
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ andl $0x80000000, %ecx // ECX == 0 iff X >= 0
+
+ fldt MO(ln2_2) // 0.5*ln2
+ xorl %ecx, 4(%esp)
+ flds 4(%esp) // |x| : 0.5*ln2
+ fcoml MO(half) // |x| : 0.5*ln2
+ fld %st(0) // |x| : |x| : 0.5*ln2
+ fnstsw // |x| : |x| : 0.5*ln2
+ sahf
+ jae 2f
+ fadd %st, %st(1) // |x| : 2*|x| : 0.5*ln2
+ fld %st // |x| : |x| : 2*|x| : 0.5*ln2
+ fsubrl MO(one) // 1-|x| : |x| : 2*|x| : 0.5*ln2
+ fxch // |x| : 1-|x| : 2*|x| : 0.5*ln2
+ fmul %st(2) // 2*|x|^2 : 1-|x| : 2*|x| : 0.5*ln2
+ fdivp // (2*|x|^2)/(1-|x|) : 2*|x| : 0.5*ln2
+ faddp // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fcoml MO(limit) // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fnstsw // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ sahf
+ jae 4f
+ fyl2xp1 // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|))
+ FLT_CHECK_FORCE_UFLOW_NONNEG
+ jecxz 3f
+ fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x))
+3: ret
+
+ .align ALIGNARG(4)
+4: faddl MO(one) // 1+2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fyl2x // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x))
+3: ret
+
+ .align ALIGNARG(4)
+2: faddl MO(one) // 1+|x| : |x| : 0.5*ln2
+ fxch // |x| : 1+|x| : 0.5*ln2
+ fsubrl MO(one) // 1-|x| : 1+|x| : 0.5*ln2
+ fdivrp // (1+|x|)/(1-|x|) : 0.5*ln2
+ fyl2x // 0.5*ln2*ld((1+|x|)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld((1+x)/(1-x))
+3: ret
+
+ // x == NaN
+5: flds 4(%esp)
+ ret
+END(__ieee754_atanhf)
+strong_alias (__ieee754_atanhf, __atanhf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_atanhl.S b/REORG.TODO/sysdeps/i386/fpu/e_atanhl.S
new file mode 100644
index 0000000000..31ff7e5182
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_atanhl.S
@@ -0,0 +1,127 @@
+/* ix87 specific implementation of arctanh function.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ /* Please note that we use double values for 0.5 and 1.0. These
+ numbers have exact representations and so we don't get accuracy
+ problems. The advantage is that the code is simpler. */
+ .type half,@object
+half: .double 0.5
+ ASM_SIZE_DIRECTIVE(half)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ .align ALIGNARG(4)
+ .type ln2_2,@object
+ln2_2: .tfloat 0.3465735902799726547086160
+ ASM_SIZE_DIRECTIVE(ln2_2)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_atanhl)
+ movl 12(%esp), %ecx
+
+ movl %ecx, %eax
+ andl $0x7fff, %eax
+ cmpl $0x7fff, %eax
+ je 5f
+ cmpl $0x3fdf, %eax
+ jge 7f
+ // Exponent below -32; return x, with underflow if subnormal.
+ fldt 4(%esp)
+ cmpl $0, %eax
+ jne 8f
+ fld %st(0)
+ fmul %st(0)
+ fstp %st(0)
+8: ret
+7:
+
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ andl $0x8000, %ecx // ECX == 0 iff X >= 0
+
+ fldt MO(ln2_2) // 0.5*ln2
+ xorl %ecx, 12(%esp)
+ fldt 4(%esp) // |x| : 0.5*ln2
+ fcoml MO(half) // |x| : 0.5*ln2
+ fld %st(0) // |x| : |x| : 0.5*ln2
+ fnstsw // |x| : |x| : 0.5*ln2
+ sahf
+ jae 2f
+ fadd %st, %st(1) // |x| : 2*|x| : 0.5*ln2
+ fld %st // |x| : |x| : 2*|x| : 0.5*ln2
+ fsubrl MO(one) // 1-|x| : |x| : 2*|x| : 0.5*ln2
+ fxch // |x| : 1-|x| : 2*|x| : 0.5*ln2
+ fmul %st(2) // 2*|x|^2 : 1-|x| : 2*|x| : 0.5*ln2
+ fdivp // (2*|x|^2)/(1-|x|) : 2*|x| : 0.5*ln2
+ faddp // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fcoml MO(limit) // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fnstsw // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ sahf
+ jae 4f
+ fyl2xp1 // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x))
+3: ret
+
+ .align ALIGNARG(4)
+4: faddl MO(one) // 1+2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2
+ fyl2x // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x))
+3: ret
+
+ .align ALIGNARG(4)
+2: faddl MO(one) // 1+|x| : |x| : 0.5*ln2
+ fxch // |x| : 1+|x| : 0.5*ln2
+ fsubrl MO(one) // 1-|x| : 1+|x| : 0.5*ln2
+ fdivrp // (1+|x|)/(1-|x|) : 0.5*ln2
+ fyl2x // 0.5*ln2*ld((1+|x|)/(1-|x|))
+ jecxz 3f
+ fchs // 0.5*ln2*ld((1+x)/(1-x))
+3: ret
+
+ // x == NaN or ±Inf
+5: cmpl $0x80000000, 8(%esp)
+ ja 6f
+ cmpl $0, 4(%esp)
+ je 7b
+6: fldt 4(%esp)
+ fadd %st(0)
+ ret
+END(__ieee754_atanhl)
+strong_alias (__ieee754_atanhl, __atanhl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp.S b/REORG.TODO/sysdeps/i386/fpu/e_exp.S
new file mode 100644
index 0000000000..a7e7f13f6f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp.S
@@ -0,0 +1,73 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+/* e^x = 2^(x * log2(e)) */
+ENTRY(__ieee754_exp)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ fldl2e
+ fmulp /* x * log2(e) */
+ fld %st
+ frndint /* int(x * log2(e)) */
+ fsubr %st,%st(1) /* fract(x * log2(e)) */
+ fxch
+ f2xm1 /* 2^(fract(x * log2(e))) - 1 */
+ fld1
+ faddp /* 2^(fract(x * log2(e))) */
+ fscale /* e^x */
+ fstp %st(1)
+ DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_exp)
+
+
+ENTRY(__exp_finite)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl2e
+ fmull 4(%esp) /* x * log2(e) */
+ fld %st
+ frndint /* int(x * log2(e)) */
+ fsubr %st,%st(1) /* fract(x * log2(e)) */
+ fxch
+ f2xm1 /* 2^(fract(x * log2(e))) - 1 */
+ fld1
+ faddp /* 2^(fract(x * log2(e))) */
+ fscale /* e^x */
+ fstp %st(1)
+ DBL_NARROW_EVAL_UFLOW_NONNEG
+ ret
+END(__exp_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp10.S b/REORG.TODO/sysdeps/i386/fpu/e_exp10.S
new file mode 100644
index 0000000000..acb5160a3f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp10.S
@@ -0,0 +1,53 @@
+/*
+ * Written by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+/* 10^x = 2^(x * log2(10)) */
+ENTRY(__ieee754_exp10)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ fldl2t
+ fmulp /* x * log2(10) */
+ fld %st
+ frndint /* int(x * log2(10)) */
+ fsubr %st,%st(1) /* fract(x * log2(10)) */
+ fxch
+ f2xm1 /* 2^(fract(x * log2(10))) - 1 */
+ fld1
+ faddp /* 2^(fract(x * log2(10))) */
+ fscale /* e^x */
+ fstp %st(1)
+ DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_exp10)
+strong_alias (__ieee754_exp10, __exp10_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp10f.S b/REORG.TODO/sysdeps/i386/fpu/e_exp10f.S
new file mode 100644
index 0000000000..1812b34398
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp10f.S
@@ -0,0 +1,53 @@
+/*
+ * Written by Ulrich Drepper.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+/* 10^x = 2^(x * log2(10)) */
+ENTRY(__ieee754_exp10f)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ flds 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ fldl2t
+ fmulp /* x * log2(10) */
+ fld %st
+ frndint /* int(x * log2(10)) */
+ fsubr %st,%st(1) /* fract(x * log2(10)) */
+ fxch
+ f2xm1 /* 2^(fract(x * log2(10))) - 1 */
+ fld1
+ faddp /* 2^(fract(x * log2(10))) */
+ fscale /* e^x */
+ fstp %st(1)
+ FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_exp10f)
+strong_alias (__ieee754_exp10f, __exp10f_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp10l.S b/REORG.TODO/sysdeps/i386/fpu/e_exp10l.S
new file mode 100644
index 0000000000..d843e2b5e8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp10l.S
@@ -0,0 +1,2 @@
+#define USE_AS_EXP10L
+#include <e_expl.S>
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp2.S b/REORG.TODO/sysdeps/i386/fpu/e_exp2.S
new file mode 100644
index 0000000000..fc16a96053
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp2.S
@@ -0,0 +1,52 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Adapted for exp2 by Ulrich Drepper <drepper@cygnus.com>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_exp2)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ fld %st
+ frndint /* int(x) */
+ fsubr %st,%st(1) /* fract(x) */
+ fxch
+ f2xm1 /* 2^(fract(x)) - 1 */
+ fld1
+ faddp /* 2^(fract(x)) */
+ fscale /* e^x */
+ fstp %st(1)
+ DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_exp2)
+strong_alias (__ieee754_exp2, __exp2_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp2f.S b/REORG.TODO/sysdeps/i386/fpu/e_exp2f.S
new file mode 100644
index 0000000000..30623cd850
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp2f.S
@@ -0,0 +1,52 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Adapted for exp2 by Ulrich Drepper <drepper@cygnus.com>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_exp2f)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ flds 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ fld %st
+ frndint /* int(x) */
+ fsubr %st,%st(1) /* fract(x) */
+ fxch
+ f2xm1 /* 2^(fract(x)) - 1 */
+ fld1
+ faddp /* 2^(fract(x)) */
+ fscale /* e^x */
+ fstp %st(1)
+ FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_exp2f)
+strong_alias (__ieee754_exp2f, __exp2f_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_exp2l.S b/REORG.TODO/sysdeps/i386/fpu/e_exp2l.S
new file mode 100644
index 0000000000..c4cb73d589
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_exp2l.S
@@ -0,0 +1,60 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Adapted for exp2 by Ulrich Drepper <drepper@cygnus.com>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_LDBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_exp2l)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldt 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ movzwl 4+8(%esp), %eax
+ andl $0x7fff, %eax
+ cmpl $0x3fbe, %eax
+ jge 3f
+ /* Argument's exponent below -65, result rounds to 1. */
+ fld1
+ faddp
+ ret
+3: fld %st
+ frndint /* int(x) */
+ fsubr %st,%st(1) /* fract(x) */
+ fxch
+ f2xm1 /* 2^(fract(x)) - 1 */
+ fld1
+ faddp /* 2^(fract(x)) */
+ fscale /* e^x */
+ fstp %st(1)
+ LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_exp2l)
+strong_alias (__ieee754_exp2l, __exp2l_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_expf.S b/REORG.TODO/sysdeps/i386/fpu/e_expf.S
new file mode 100644
index 0000000000..65cb4ec204
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_expf.S
@@ -0,0 +1,74 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+/* e^x = 2^(x * log2(e)) */
+ENTRY(__ieee754_expf)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ flds 4(%esp)
+/* I added the following ugly construct because exp(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ fldl2e
+ fmulp /* x * log2(e) */
+ fld %st
+ frndint /* int(x * log2(e)) */
+ fsubr %st,%st(1) /* fract(x * log2(e)) */
+ fxch
+ f2xm1 /* 2^(fract(x * log2(e))) - 1 */
+ fld1
+ faddp /* 2^(fract(x * log2(e))) */
+ fscale /* e^x */
+ fstp %st(1)
+ FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
+ ret
+
+1: testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+2: ret
+END (__ieee754_expf)
+
+
+ENTRY(__expf_finite)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl2e
+ fmuls 4(%esp) /* x * log2(e) */
+ fld %st
+ frndint /* int(x * log2(e)) */
+ fsubr %st,%st(1) /* fract(x * log2(e)) */
+ fxch
+ f2xm1 /* 2^(fract(x * log2(e))) - 1 */
+ fld1
+ faddp /* 2^(fract(x * log2(e))) */
+ fscale /* e^x */
+ fstp %st(1)
+ FLT_NARROW_EVAL_UFLOW_NONNEG
+ ret
+END(__expf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_expl.S b/REORG.TODO/sysdeps/i386/fpu/e_expl.S
new file mode 100644
index 0000000000..7d75fe22a1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_expl.S
@@ -0,0 +1,226 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+/*
+ * The 8087 method for the exponential function is to calculate
+ * exp(x) = 2^(x log2(e))
+ * after separating integer and fractional parts
+ * x log2(e) = i + f, |f| <= .5
+ * 2^i is immediate but f needs to be precise for long double accuracy.
+ * Suppress range reduction error in computing f by the following.
+ * Separate x into integer and fractional parts
+ * x = xi + xf, |xf| <= .5
+ * Separate log2(e) into the sum of an exact number c0 and small part c1.
+ * c0 + c1 = log2(e) to extra precision
+ * Then
+ * f = (c0 xi - i) + c0 xf + c1 x
+ * where c0 xi is exact and so also is (c0 xi - i).
+ * -- moshier@na-net.ornl.gov
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+#ifdef USE_AS_EXP10L
+# define IEEE754_EXPL __ieee754_exp10l
+# define EXPL_FINITE __exp10l_finite
+# define FLDLOG fldl2t
+#elif defined USE_AS_EXPM1L
+# define IEEE754_EXPL __expm1l
+# undef EXPL_FINITE
+# define FLDLOG fldl2e
+#else
+# define IEEE754_EXPL __ieee754_expl
+# define EXPL_FINITE __expl_finite
+# define FLDLOG fldl2e
+#endif
+
+ .section .rodata.cst16,"aM",@progbits,16
+
+ .p2align 4
+#ifdef USE_AS_EXP10L
+ .type c0,@object
+c0: .byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40
+ .byte 0, 0, 0, 0, 0, 0
+ ASM_SIZE_DIRECTIVE(c0)
+ .type c1,@object
+c1: .byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f
+ .byte 0, 0, 0, 0, 0, 0
+ ASM_SIZE_DIRECTIVE(c1)
+#else
+ .type c0,@object
+c0: .byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f
+ .byte 0, 0, 0, 0, 0, 0
+ ASM_SIZE_DIRECTIVE(c0)
+ .type c1,@object
+c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f
+ .byte 0, 0, 0, 0, 0, 0
+ ASM_SIZE_DIRECTIVE(c1)
+#endif
+#ifndef USE_AS_EXPM1L
+ .type csat,@object
+csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40
+ .byte 0, 0, 0, 0, 0, 0
+ ASM_SIZE_DIRECTIVE(csat)
+DEFINE_LDBL_MIN
+#endif
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(IEEE754_EXPL)
+#ifdef USE_AS_EXPM1L
+ movzwl 4+8(%esp), %eax
+ xorb $0x80, %ah // invert sign bit (now 1 is "positive")
+ cmpl $0xc006, %eax // is num positive and exp >= 6 (number is >= 128.0)?
+ jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0)
+#endif
+ fldt 4(%esp)
+/* I added the following ugly construct because expl(+-Inf) resulted
+ in NaN. The ugliness results from the bright minds at Intel.
+ For the i686 the code can be written better.
+ -- drepper@cygnus.com. */
+ fxam /* Is NaN or +-Inf? */
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+#ifdef USE_AS_EXPM1L
+ xorb $0x80, %ah
+ cmpl $0xc006, %eax
+ fstsw %ax
+ movb $0x45, %dh
+ jb 4f
+
+ /* Below -64.0 (may be -NaN or -Inf). */
+ andb %ah, %dh
+ cmpb $0x01, %dh
+ je 6f /* Is +-NaN, jump. */
+ jmp 1f /* -large, possibly -Inf. */
+
+4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */
+ /* Test for +-0 as argument. */
+ andb %ah, %dh
+ cmpb $0x40, %dh
+ je 2f
+
+ /* Test for arguments that are small but not subnormal. */
+ movzwl 4+8(%esp), %eax
+ andl $0x7fff, %eax
+ cmpl $0x3fbf, %eax
+ jge 3f
+ /* Argument's exponent below -64; avoid spurious underflow if
+ normal. */
+ cmpl $0x0001, %eax
+ jge 2f
+ /* Force underflow and return the argument, to avoid wrong signs
+ of zero results from the code below in some rounding modes. */
+ fld %st
+ fmul %st
+ fstp %st
+ jmp 2f
+#else
+ movzwl 4+8(%esp), %eax
+ andl $0x7fff, %eax
+ cmpl $0x400d, %eax
+ jg 5f
+ cmpl $0x3fbc, %eax
+ jge 3f
+ /* Argument's exponent below -67, result rounds to 1. */
+ fld1
+ faddp
+ jmp 2f
+5: /* Overflow, underflow or infinity or NaN as argument. */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ cmpb $0x01, %dh
+ je 6f /* Is +-NaN, jump. */
+ /* Overflow or underflow; saturate. */
+ fstp %st
+ fldt MO(csat)
+ andb $2, %ah
+ jz 3f
+ fchs
+#endif
+3: FLDLOG /* 1 log2(base) */
+ fmul %st(1), %st /* 1 x log2(base) */
+ /* Set round-to-nearest temporarily. */
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fstcw 4(%esp)
+ movl $0xf3ff, %edx
+ andl 4(%esp), %edx
+ movl %edx, (%esp)
+ fldcw (%esp)
+ frndint /* 1 i */
+ fld %st(1) /* 2 x */
+ frndint /* 2 xi */
+ fldcw 4(%esp)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fld %st(1) /* 3 i */
+ fldt MO(c0) /* 4 c0 */
+ fld %st(2) /* 5 xi */
+ fmul %st(1), %st /* 5 c0 xi */
+ fsubp %st, %st(2) /* 4 f = c0 xi - i */
+ fld %st(4) /* 5 x */
+ fsub %st(3), %st /* 5 xf = x - xi */
+ fmulp %st, %st(1) /* 4 c0 xf */
+ faddp %st, %st(1) /* 3 f = f + c0 xf */
+ fldt MO(c1) /* 4 */
+ fmul %st(4), %st /* 4 c1 * x */
+ faddp %st, %st(1) /* 3 f = f + c1 * x */
+ f2xm1 /* 3 2^(fract(x * log2(base))) - 1 */
+#ifdef USE_AS_EXPM1L
+ fstp %st(1) /* 2 */
+ fscale /* 2 scale factor is st(1); base^x - 2^i */
+ fxch /* 2 i */
+ fld1 /* 3 1.0 */
+ fscale /* 3 2^i */
+ fld1 /* 4 1.0 */
+ fsubrp %st, %st(1) /* 3 2^i - 1.0 */
+ fstp %st(1) /* 2 */
+ faddp %st, %st(1) /* 1 base^x - 1.0 */
+#else
+ fld1 /* 4 1.0 */
+ faddp /* 3 2^(fract(x * log2(base))) */
+ fstp %st(1) /* 2 */
+ fscale /* 2 scale factor is st(1); base^x */
+ fstp %st(1) /* 1 */
+ LDBL_CHECK_FORCE_UFLOW_NONNEG
+#endif
+ fstp %st(1) /* 0 */
+ jmp 2f
+1:
+#ifdef USE_AS_EXPM1L
+ /* For expm1l, only negative sign gets here. */
+ fstp %st
+ fld1
+ fchs
+#else
+ testl $0x200, %eax /* Test sign. */
+ jz 2f /* If positive, jump. */
+ fstp %st
+ fldz /* Set result to 0. */
+#endif
+2: ret
+6: /* NaN argument. */
+ fadd %st
+ ret
+END(IEEE754_EXPL)
+#ifdef USE_AS_EXPM1L
+libm_hidden_def (__expm1l)
+weak_alias (__expm1l, expm1l)
+#else
+strong_alias (IEEE754_EXPL, EXPL_FINITE)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_fmod.S b/REORG.TODO/sysdeps/i386/fpu/e_fmod.S
new file mode 100644
index 0000000000..26b3acc392
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_fmod.S
@@ -0,0 +1,18 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_fmod)
+ fldl 12(%esp)
+ fldl 4(%esp)
+1: fprem
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ ret
+END (__ieee754_fmod)
+strong_alias (__ieee754_fmod, __fmod_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_fmodf.S b/REORG.TODO/sysdeps/i386/fpu/e_fmodf.S
new file mode 100644
index 0000000000..ece4d98427
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_fmodf.S
@@ -0,0 +1,19 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_fmodf)
+ flds 8(%esp)
+ flds 4(%esp)
+1: fprem
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ ret
+END(__ieee754_fmodf)
+strong_alias (__ieee754_fmodf, __fmodf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_fmodl.c b/REORG.TODO/sysdeps/i386/fpu/e_fmodl.c
new file mode 100644
index 0000000000..49700ae8f6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_fmodl.c
@@ -0,0 +1,23 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <math_private.h>
+
+long double
+__ieee754_fmodl (long double x, long double y)
+{
+ long double res;
+
+ asm ("1:\tfprem\n"
+ "fstsw %%ax\n"
+ "sahf\n"
+ "jp 1b\n"
+ "fstp %%st(1)"
+ : "=t" (res) : "0" (x), "u" (y) : "ax", "st(1)");
+ return res;
+}
+strong_alias (__ieee754_fmodl, __fmodl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_hypot.S b/REORG.TODO/sysdeps/i386/fpu/e_hypot.S
new file mode 100644
index 0000000000..7403566fd7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_hypot.S
@@ -0,0 +1,75 @@
+/* Compute the hypothenuse of X and Y.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <i386-math-asm.h>
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_hypot)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fldl 4(%esp) // x
+ fxam
+ fnstsw
+ fldl 12(%esp) // y : x
+ movb %ah, %ch
+ fxam
+ fnstsw
+ movb %ah, %al
+ orb %ch, %ah
+ sahf
+ jc 1f
+ fmul %st(0) // y * y : x
+ fxch // x : y * y
+ fmul %st(0) // x * x : y * y
+ faddp // x * x + y * y
+ fsqrt
+ DBL_NARROW_EVAL_UFLOW_NONNEG
+2: ret
+
+ // We have to test whether any of the parameters is Inf.
+ // In this case the result is infinity.
+1: andb $0x45, %al
+ cmpb $5, %al
+ je 3f // jump if y is Inf
+ andb $0x45, %ch
+ cmpb $5, %ch
+ jne 4f // jump if x is not Inf
+ fxch
+3: fstp %st(1)
+ fabs
+ jmp 2b
+
+4: testb $1, %al
+ jnz 5f // y is NaN
+ fxch
+5: fstp %st(1)
+ jmp 2b
+
+END(__ieee754_hypot)
+strong_alias (__ieee754_hypot, __hypot_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_hypotf.S b/REORG.TODO/sysdeps/i386/fpu/e_hypotf.S
new file mode 100644
index 0000000000..6a2c7052b2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_hypotf.S
@@ -0,0 +1,64 @@
+/* Compute the hypothenuse of X and Y.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <i386-math-asm.h>
+
+ .text
+ENTRY(__ieee754_hypotf)
+ flds 4(%esp) // x
+ fxam
+ fnstsw
+ flds 8(%esp) // y : x
+ movb %ah, %ch
+ fxam
+ fnstsw
+ movb %ah, %al
+ orb %ch, %ah
+ sahf
+ jc 1f
+ fmul %st(0) // y * y : x
+ fxch // x : y * y
+ fmul %st(0) // x * x : y * y
+ faddp // x * x + y * y
+ fsqrt
+ FLT_NARROW_EVAL
+2: ret
+
+ // We have to test whether any of the parameters is Inf.
+ // In this case the result is infinity.
+1: andb $0x45, %al
+ cmpb $5, %al
+ je 3f // jump if y is Inf
+ andb $0x45, %ch
+ cmpb $5, %ch
+ jne 4f // jump if x is not Inf
+ fxch
+3: fstp %st(1)
+ fabs
+ jmp 2b
+
+4: testb $1, %al
+ jnz 5f // y is NaN
+ fxch
+5: fstp %st(1)
+ jmp 2b
+
+END(__ieee754_hypotf)
+strong_alias (__ieee754_hypotf, __hypotf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_ilogb.S b/REORG.TODO/sysdeps/i386/fpu/e_ilogb.S
new file mode 100644
index 0000000000..29ef2214e6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_ilogb.S
@@ -0,0 +1,42 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_ilogb.S,v 1.5 1995/10/12 15:53:09 jtc Exp $")
+
+ENTRY(__ieee754_ilogb)
+ fldl 4(%esp)
+/* I added the following ugly construct because ilogb(+-Inf) is
+ required to return INT_MAX in ISO C99.
+ -- jakub@redhat.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ cmpb $0x40, %dh
+ je 2f /* Is +-0, jump. */
+
+ fxtract
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ fstp %st
+
+ fistpl (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+
+ ret
+
+1: fstp %st
+ movl $0x7fffffff, %eax
+ ret
+2: fstp %st
+ movl $0x80000000, %eax /* FP_ILOGB0 */
+ ret
+END (__ieee754_ilogb)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_ilogbf.S b/REORG.TODO/sysdeps/i386/fpu/e_ilogbf.S
new file mode 100644
index 0000000000..d72de6c84a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_ilogbf.S
@@ -0,0 +1,42 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_ilogbf.S,v 1.4 1995/10/22 20:32:43 pk Exp $")
+
+ENTRY(__ieee754_ilogbf)
+ flds 4(%esp)
+/* I added the following ugly construct because ilogb(+-Inf) is
+ required to return INT_MAX in ISO C99.
+ -- jakub@redhat.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ cmpb $0x40, %dh
+ je 2f /* Is +-0, jump. */
+
+ fxtract
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ fstp %st
+
+ fistpl (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+
+ ret
+
+1: fstp %st
+ movl $0x7fffffff, %eax
+ ret
+2: fstp %st
+ movl $0x80000000, %eax /* FP_ILOGB0 */
+ ret
+END (__ieee754_ilogbf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_ilogbl.S b/REORG.TODO/sysdeps/i386/fpu/e_ilogbl.S
new file mode 100644
index 0000000000..60761dfa38
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_ilogbl.S
@@ -0,0 +1,43 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: $")
+
+ENTRY(__ieee754_ilogbl)
+ fldt 4(%esp)
+/* I added the following ugly construct because ilogb(+-Inf) is
+ required to return INT_MAX in ISO C99.
+ -- jakub@redhat.com. */
+ fxam /* Is NaN or +-Inf? */
+ fstsw %ax
+ movb $0x45, %dh
+ andb %ah, %dh
+ cmpb $0x05, %dh
+ je 1f /* Is +-Inf, jump. */
+ cmpb $0x40, %dh
+ je 2f /* Is +-0, jump. */
+
+ fxtract
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ fstp %st
+
+ fistpl (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+
+ ret
+
+1: fstp %st
+ movl $0x7fffffff, %eax
+ ret
+2: fstp %st
+ movl $0x80000000, %eax /* FP_ILOGB0 */
+ ret
+END (__ieee754_ilogbl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log.S b/REORG.TODO/sysdeps/i386/fpu/e_log.S
new file mode 100644
index 0000000000..335df22577
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log.S
@@ -0,0 +1,92 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+ fxam
+ fnstsw
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ sahf
+ jc 3f // in case x is NaN or +-Inf
+4: fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: jp 4b // in case x is +-Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+END (__ieee754_log)
+
+ENTRY(__log_finite)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 6f
+ fabs // log(1) is +0 in all rounding modes.
+6: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__log_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log10.S b/REORG.TODO/sysdeps/i386/fpu/e_log10.S
new file mode 100644
index 0000000000..17277084ca
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log10.S
@@ -0,0 +1,68 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log10)
+ fldlg2 // log10(2)
+ fldl 4(%esp) // x : log10(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fxam
+ fnstsw
+ fld %st // x : x : log10(2)
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fsubl MO(one) // x-1 : x : log10(2)
+ fld %st // x-1 : x-1 : x : log10(2)
+ fabs // |x-1| : x-1 : x : log10(2)
+ fcompl MO(limit) // x-1 : x : log10(2)
+ fnstsw // x-1 : x : log10(2)
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log10(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : log10(2)
+ fyl2xp1 // log10(x)
+ ret
+
+2: fstp %st(0) // x : log10(2)
+ fyl2x // log10(x)
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+END (__ieee754_log10)
+strong_alias (__ieee754_log10, __log10_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log10f.S b/REORG.TODO/sysdeps/i386/fpu/e_log10f.S
new file mode 100644
index 0000000000..72a3b88251
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log10f.S
@@ -0,0 +1,69 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log10f)
+ fldlg2 // log10(2)
+ flds 4(%esp) // x : log10(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fxam
+ fnstsw
+ fld %st // x : x : log10(2)
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fsubl MO(one) // x-1 : x : log10(2)
+ fld %st // x-1 : x-1 : x : log10(2)
+ fabs // |x-1| : x-1 : x : log10(2)
+ fcompl MO(limit) // x-1 : x : log10(2)
+ fnstsw // x-1 : x : log10(2)
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log10(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : log10(2)
+ fyl2xp1 // log10(x)
+ ret
+
+2: fstp %st(0) // x : log10(2)
+ fyl2x // log10(x)
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+END (__ieee754_log10f)
+strong_alias (__ieee754_log10f, __log10f_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log10l.S b/REORG.TODO/sysdeps/i386/fpu/e_log10l.S
new file mode 100644
index 0000000000..9326b19796
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log10l.S
@@ -0,0 +1,71 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log10l)
+ fldlg2 // log10(2)
+ fldt 4(%esp) // x : log10(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fxam
+ fnstsw
+ fld %st // x : x : log10(2)
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fsubl MO(one) // x-1 : x : log10(2)
+ fld %st // x-1 : x-1 : x : log10(2)
+ fabs // |x-1| : x-1 : x : log10(2)
+ fcompl MO(limit) // x-1 : x : log10(2)
+ fnstsw // x-1 : x : log10(2)
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log10(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : log10(2)
+ fyl2xp1 // log10(x)
+ ret
+
+2: fstp %st(0) // x : log10(2)
+ fyl2x // log10(x)
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ fadd %st(0)
+ ret
+END(__ieee754_log10l)
+strong_alias (__ieee754_log10l, __log10l_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log2.S b/REORG.TODO/sysdeps/i386/fpu/e_log2.S
new file mode 100644
index 0000000000..73ff0fffd3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log2.S
@@ -0,0 +1,69 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>.
+ * Public domain.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fldl MO(one)
+ fldl 4(%esp) // x : 1
+ fxam
+ fnstsw
+ fld %st // x : x : 1
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fsub %st(2), %st // x-1 : x : 1
+ fld %st // x-1 : x-1 : x : 1
+ fabs // |x-1| : x-1 : x : 1
+ fcompl MO(limit) // x-1 : x : 1
+ fnstsw // x-1 : x : 1
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log2(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : 1
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : 1
+ fyl2x // log(x)
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+END (__ieee754_log2)
+strong_alias (__ieee754_log2, __log2_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log2f.S b/REORG.TODO/sysdeps/i386/fpu/e_log2f.S
new file mode 100644
index 0000000000..344eeb495e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log2f.S
@@ -0,0 +1,69 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>.
+ * Public domain.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log2f)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fldl MO(one)
+ flds 4(%esp) // x : 1
+ fxam
+ fnstsw
+ fld %st // x : x : 1
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fsub %st(2), %st // x-1 : x : 1
+ fld %st // x-1 : x-1 : x : 1
+ fabs // |x-1| : x-1 : x : 1
+ fcompl MO(limit) // x-1 : x : 1
+ fnstsw // x-1 : x : 1
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log2(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : 1
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : 1
+ fyl2x // log(x)
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+END (__ieee754_log2f)
+strong_alias (__ieee754_log2f, __log2f_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_log2l.S b/REORG.TODO/sysdeps/i386/fpu/e_log2l.S
new file mode 100644
index 0000000000..73e62ea908
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_log2l.S
@@ -0,0 +1,70 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>.
+ * Public domain.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log2l)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fldl MO(one)
+ fldt 4(%esp) // x : 1
+ fxam
+ fnstsw
+ fld %st // x : x : 1
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fsub %st(2), %st // x-1 : x : 1
+ fld %st // x-1 : x-1 : x : 1
+ fabs // |x-1| : x-1 : x : 1
+ fcompl MO(limit) // x-1 : x : 1
+ fnstsw // x-1 : x : 1
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log2(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : 1
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : 1
+ fyl2x // log(x)
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ fadd %st(0)
+ ret
+END (__ieee754_log2l)
+strong_alias (__ieee754_log2l, __log2l_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_logf.S b/REORG.TODO/sysdeps/i386/fpu/e_logf.S
new file mode 100644
index 0000000000..de967a31f5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_logf.S
@@ -0,0 +1,93 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_logf)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+ fxam
+ fnstsw
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ sahf
+ jc 3f // in case x is NaN or +-Inf
+4: fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: jp 4b // in case x is +-Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+END (__ieee754_logf)
+
+ENTRY(__logf_finite)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 6f
+ fabs // log(1) is +0 in all rounding modes.
+6: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_logl.S b/REORG.TODO/sysdeps/i386/fpu/e_logl.S
new file mode 100644
index 0000000000..53127d704e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_logl.S
@@ -0,0 +1,97 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_logl)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+ fxam
+ fnstsw
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ sahf
+ jc 3f // in case x is NaN or +-Inf
+ movzwl 4+8(%esp), %eax
+ cmpl $0xc000, %eax
+ jae 6f // x <= -2, avoid overflow from -LDBL_MAX - 1.
+4: fsubl MO(one) // x-1 : x : log(2)
+6: fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 5f
+ fabs // log(1) is +0 in all rounding modes.
+5: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: jp 4b // in case x is +-Inf
+ fstp %st(1)
+ fstp %st(1)
+ fadd %st(0)
+ ret
+END (__ieee754_logl)
+
+ENTRY(__logl_finite)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 7f
+ fabs // log(1) is +0 in all rounding modes.
+7: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_pow.S b/REORG.TODO/sysdeps/i386/fpu/e_pow.S
new file mode 100644
index 0000000000..2edb9a9fbc
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_pow.S
@@ -0,0 +1,456 @@
+/* ix87 specific implementation of pow function.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ .type p63,@object
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
+ .type p10,@object
+p10: .byte 0, 0, 0, 0, 0, 0, 0x90, 0x40
+ ASM_SIZE_DIRECTIVE(p10)
+
+ .section .rodata.cst16,"aM",@progbits,16
+
+ .p2align 3
+ .type infinity,@object
+inf_zero:
+infinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+ ASM_SIZE_DIRECTIVE(infinity)
+ .type zero,@object
+zero: .double 0.0
+ ASM_SIZE_DIRECTIVE(zero)
+ .type minf_mzero,@object
+minf_mzero:
+minfinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ ASM_SIZE_DIRECTIVE(minf_mzero)
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+# define MO(op) op
+# define MOX(op,x,f) op(,x,f)
+#endif
+
+ .text
+ENTRY(__ieee754_pow)
+ fldl 12(%esp) // y
+ fxam
+
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+
+ fnstsw
+ movb %ah, %dl
+ andb $0x45, %ah
+ cmpb $0x40, %ah // is y == 0 ?
+ je 11f
+
+ cmpb $0x05, %ah // is y == ±inf ?
+ je 12f
+
+ cmpb $0x01, %ah // is y == NaN ?
+ je 30f
+
+ fldl 4(%esp) // x : y
+
+ subl $8,%esp
+ cfi_adjust_cfa_offset (8)
+
+ fxam
+ fnstsw
+ movb %ah, %dh
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ je 20f // x is ±0
+
+ cmpb $0x05, %ah
+ je 15f // x is ±inf
+
+ cmpb $0x01, %ah
+ je 32f // x is NaN
+
+ fxch // y : x
+
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
+ /* First see whether `y' is a natural number. In this case we
+ can use a more precise algorithm. */
+ fld %st // y : y : x
+ fistpll (%esp) // y : x
+ fildll (%esp) // int(y) : y : x
+ fucomp %st(1) // y : x
+ fnstsw
+ sahf
+ jne 3f
+
+ /* OK, we have an integer value for y. If large enough that
+ errors may propagate out of the 11 bits excess precision, use
+ the algorithm for real exponent instead. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p10) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ orl $0, %edx
+ fstp %st(0) // x
+ jns 4f // y >= 0, jump
+ fdivrl MO(one) // 1/x (now referred to as x)
+ negl %eax
+ adcl $0, %edx
+ negl %edx
+4: fldl MO(one) // 1 : x
+ fxch
+
+ /* If y is even, take the absolute value of x. Otherwise,
+ ensure all intermediate values that might overflow have the
+ sign of x. */
+ testb $1, %al
+ jnz 6f
+ fabs
+
+6: shrdl $1, %edx, %eax
+ jnc 5f
+ fxch
+ fabs
+ fmul %st(1) // x : ST*x
+ fxch
+5: fld %st // x : x : ST*x
+ fabs // |x| : x : ST*x
+ fmulp // |x|*x : ST*x
+ shrl $1, %edx
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jnz 6b
+ fstp %st(0) // ST*x
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ DBL_NARROW_EVAL_UFLOW_NONNAN
+ ret
+
+ /* y is ±NAN */
+30: fldl 4(%esp) // x : y
+ fldl MO(one) // 1.0 : x : y
+ fucomp %st(1) // x : y
+ fnstsw
+ sahf
+ je 31f
+ fxch // y : x
+31: fstp %st(1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+32: addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fstp %st(1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+2: // y is a large integer (absolute value at least 1L<<10), but
+ // may be odd unless at least 1L<<64. So it may be necessary
+ // to adjust the sign of a negative result afterwards.
+ fxch // x : y
+ fabs // |x| : y
+ fxch // y : x
+ .align ALIGNARG(4)
+3: /* y is a real number. */
+ fxch // x : y
+ fldl MO(one) // 1.0 : x : y
+ fldl MO(limit) // 0.29 : 1.0 : x : y
+ fld %st(2) // x : 0.29 : 1.0 : x : y
+ fsub %st(2) // x-1 : 0.29 : 1.0 : x : y
+ fabs // |x-1| : 0.29 : 1.0 : x : y
+ fucompp // 1.0 : x : y
+ fnstsw
+ fxch // x : 1.0 : y
+ sahf
+ ja 7f
+ fsub %st(1) // x-1 : 1.0 : y
+ fyl2xp1 // log2(x) : y
+ jmp 8f
+
+7: fyl2x // log2(x) : y
+8: fmul %st(1) // y*log2(x) : y
+ fst %st(1) // y*log2(x) : y*log2(x)
+ frndint // int(y*log2(x)) : y*log2(x)
+ fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
+ fxch // fract(y*log2(x)) : int(y*log2(x))
+ f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
+ faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
+
+ // Before scaling, we must negate if x is negative and y is an
+ // odd integer.
+ testb $2, %dh
+ jz 291f
+ // x is negative. If y is an odd integer, negate the result.
+ fldl 20(%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fabs // |y| : y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fcompl MO(p63) // y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fnstsw
+ sahf
+ jnc 290f
+
+ // We must find out whether y is an odd integer.
+ fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fistpll (%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fildll (%esp) // int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x))
+ fucompp // 2^fract(y*log2(x)) : int(y*log2(x))
+ fnstsw
+ sahf
+ jne 291f
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 292f // jump if not odd
+ // It's an odd integer.
+ fchs
+ jmp 292f
+
+ cfi_adjust_cfa_offset (8)
+290: fstp %st(0) // 2^fract(y*log2(x)) : int(y*log2(x))
+291: addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+292: fscale // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+ fstp %st(1) // +/- 2^fract(y*log2(x))*2^int(y*log2(x))
+ DBL_NARROW_EVAL_UFLOW_NONNAN
+ ret
+
+
+ // pow(x,±0) = 1
+ .align ALIGNARG(4)
+11: fstp %st(0) // pop y
+ fldl MO(one)
+ ret
+
+ // y == ±inf
+ .align ALIGNARG(4)
+12: fstp %st(0) // pop y
+ fldl MO(one) // 1
+ fldl 4(%esp) // x : 1
+ fabs // abs(x) : 1
+ fucompp // < 1, == 1, or > 1
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x45, %ah
+ je 13f // jump if x is NaN
+
+ cmpb $0x40, %ah
+ je 14f // jump if |x| == 1
+
+ shlb $1, %ah
+ xorb %ah, %dl
+ andl $2, %edx
+ fldl MOX(inf_zero, %edx, 4)
+ ret
+
+ .align ALIGNARG(4)
+14: fldl MO(one)
+ ret
+
+ .align ALIGNARG(4)
+13: fldl 4(%esp) // load x == NaN
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±inf
+15: fstp %st(0) // y
+ testb $2, %dh
+ jz 16f // jump if x == +inf
+
+ // fistpll raises invalid exception for |y| >= 1L<<63, so test
+ // that (in which case y is certainly even) before testing
+ // whether y is odd.
+ fld %st // y : y
+ fabs // |y| : y
+ fcompl MO(p63) // y
+ fnstsw
+ sahf
+ jnc 16f
+
+ // We must find out whether y is an odd integer.
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 17f
+
+ // OK, the value is an integer.
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 18f // jump if not odd
+ // It's an odd integer.
+ shrl $31, %edx
+ fldl MOX(minf_mzero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+16: fcompl MO(zero)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fnstsw
+ shrl $5, %eax
+ andl $8, %eax
+ fldl MOX(inf_zero, %eax, 1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+17: shll $30, %edx // sign bit for y in right position
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+18: shrl $31, %edx
+ fldl MOX(inf_zero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±0
+20: fstp %st(0) // y
+ testb $2, %dl
+ jz 21f // y > 0
+
+ // x is ±0 and y is < 0. We must find out whether y is an odd integer.
+ testb $2, %dh
+ jz 25f
+
+ // fistpll raises invalid exception for |y| >= 1L<<63, so test
+ // that (in which case y is certainly even) before testing
+ // whether y is odd.
+ fld %st // y : y
+ fabs // |y| : y
+ fcompl MO(p63) // y
+ fnstsw
+ sahf
+ jnc 25f
+
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 26f
+
+ // OK, the value is an integer.
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 27f // jump if not odd
+ // It's an odd integer.
+ // Raise divide-by-zero exception and get minus infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ fchs
+ ret
+
+ cfi_adjust_cfa_offset (8)
+25: fstp %st(0)
+26: addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+27: // Raise divide-by-zero exception and get infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±0 and y is > 0. We must find out whether y is an odd integer.
+21: testb $2, %dh
+ jz 22f
+
+ // fistpll raises invalid exception for |y| >= 1L<<63, so test
+ // that (in which case y is certainly even) before testing
+ // whether y is odd.
+ fcoml MO(p63) // y
+ fnstsw
+ sahf
+ jnc 22f
+
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 23f
+
+ // OK, the value is an integer.
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 24f // jump if not odd
+ // It's an odd integer.
+ fldl MO(mzero)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+22: fstp %st(0)
+23: addl $8, %esp // Don't use 2 x pop
+ cfi_adjust_cfa_offset (-8)
+24: fldl MO(zero)
+ ret
+
+END(__ieee754_pow)
+strong_alias (__ieee754_pow, __pow_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_powf.S b/REORG.TODO/sysdeps/i386/fpu/e_powf.S
new file mode 100644
index 0000000000..467ef2380b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_powf.S
@@ -0,0 +1,392 @@
+/* ix87 specific implementation of pow function.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ .type p31,@object
+p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
+ ASM_SIZE_DIRECTIVE(p31)
+
+ .section .rodata.cst16,"aM",@progbits,16
+
+ .p2align 3
+ .type infinity,@object
+inf_zero:
+infinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+ ASM_SIZE_DIRECTIVE(infinity)
+ .type zero,@object
+zero: .double 0.0
+ ASM_SIZE_DIRECTIVE(zero)
+ .type minf_mzero,@object
+minf_mzero:
+minfinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ ASM_SIZE_DIRECTIVE(minf_mzero)
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+# define MO(op) op
+# define MOX(op,x,f) op(,x,f)
+#endif
+
+ .text
+ENTRY(__ieee754_powf)
+ flds 8(%esp) // y
+ fxam
+
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+
+ fnstsw
+ movb %ah, %dl
+ andb $0x45, %ah
+ cmpb $0x40, %ah // is y == 0 ?
+ je 11f
+
+ cmpb $0x05, %ah // is y == ±inf ?
+ je 12f
+
+ cmpb $0x01, %ah // is y == NaN ?
+ je 30f
+
+ flds 4(%esp) // x : y
+
+ subl $4, %esp
+ cfi_adjust_cfa_offset (4)
+
+ fxam
+ fnstsw
+ movb %ah, %dh
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ je 20f // x is ±0
+
+ cmpb $0x05, %ah
+ je 15f // x is ±inf
+
+ cmpb $0x01, %ah
+ je 33f // x is NaN
+
+ fxch // y : x
+
+ /* fistpl raises invalid exception for |y| >= 1L<<31. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p31) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
+ /* First see whether `y' is a natural number. In this case we
+ can use a more precise algorithm. */
+ fld %st // y : y : x
+ fistpl (%esp) // y : x
+ fildl (%esp) // int(y) : y : x
+ fucomp %st(1) // y : x
+ fnstsw
+ sahf
+ jne 3f
+
+ /* OK, we have an integer value for y. */
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ orl $0, %edx
+ fstp %st(0) // x
+ jns 4f // y >= 0, jump
+ fdivrl MO(one) // 1/x (now referred to as x)
+ negl %edx
+4: fldl MO(one) // 1 : x
+ fxch
+
+ /* If y is even, take the absolute value of x. Otherwise,
+ ensure all intermediate values that might overflow have the
+ sign of x. */
+ testb $1, %dl
+ jnz 6f
+ fabs
+
+6: shrl $1, %edx
+ jnc 5f
+ fxch
+ fabs
+ fmul %st(1) // x : ST*x
+ fxch
+5: fld %st // x : x : ST*x
+ fabs // |x| : x : ST*x
+ fmulp // |x|*x : ST*x
+ testl %edx, %edx
+ jnz 6b
+ fstp %st(0) // ST*x
+ FLT_NARROW_EVAL_UFLOW_NONNAN
+ ret
+
+ /* y is ±NAN */
+30: flds 4(%esp) // x : y
+ fldl MO(one) // 1.0 : x : y
+ fucomp %st(1) // x : y
+ fnstsw
+ sahf
+ je 31f
+ fxch // y : x
+31: fstp %st(1)
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ .align ALIGNARG(4)
+2: /* y is a large integer (so even). */
+ fxch // x : y
+ fabs // |x| : y
+ fxch // y : x
+ .align ALIGNARG(4)
+3: /* y is a real number. */
+ fxch // x : y
+ fldl MO(one) // 1.0 : x : y
+ fldl MO(limit) // 0.29 : 1.0 : x : y
+ fld %st(2) // x : 0.29 : 1.0 : x : y
+ fsub %st(2) // x-1 : 0.29 : 1.0 : x : y
+ fabs // |x-1| : 0.29 : 1.0 : x : y
+ fucompp // 1.0 : x : y
+ fnstsw
+ fxch // x : 1.0 : y
+ sahf
+ ja 7f
+ fsub %st(1) // x-1 : 1.0 : y
+ fyl2xp1 // log2(x) : y
+ jmp 8f
+
+7: fyl2x // log2(x) : y
+8: fmul %st(1) // y*log2(x) : y
+ fst %st(1) // y*log2(x) : y*log2(x)
+ frndint // int(y*log2(x)) : y*log2(x)
+ fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
+ fxch // fract(y*log2(x)) : int(y*log2(x))
+ f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
+ faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
+ fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+32: addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+ fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
+ FLT_NARROW_EVAL_UFLOW_NONNAN
+ ret
+
+ /* x is NaN. */
+ cfi_adjust_cfa_offset (4)
+33: addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+ fstp %st(1)
+ ret
+
+ // pow(x,±0) = 1
+ .align ALIGNARG(4)
+11: fstp %st(0) // pop y
+ fldl MO(one)
+ ret
+
+ // y == ±inf
+ .align ALIGNARG(4)
+12: fstp %st(0) // pop y
+ fldl MO(one) // 1
+ flds 4(%esp) // x : 1
+ fabs // abs(x) : 1
+ fucompp // < 1, == 1, or > 1
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x45, %ah
+ je 13f // jump if x is NaN
+
+ cmpb $0x40, %ah
+ je 14f // jump if |x| == 1
+
+ shlb $1, %ah
+ xorb %ah, %dl
+ andl $2, %edx
+ fldl MOX(inf_zero, %edx, 4)
+ ret
+
+ .align ALIGNARG(4)
+14: fldl MO(one)
+ ret
+
+ .align ALIGNARG(4)
+13: flds 4(%esp) // load x == NaN
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ .align ALIGNARG(4)
+ // x is ±inf
+15: fstp %st(0) // y
+ testb $2, %dh
+ jz 16f // jump if x == +inf
+
+ // fistpl raises invalid exception for |y| >= 1L<<31, so test
+ // that (in which case y is certainly even) before testing
+ // whether y is odd.
+ fld %st // y : y
+ fabs // |y| : y
+ fcompl MO(p31) // y
+ fnstsw
+ sahf
+ jnc 16f
+
+ // We must find out whether y is an odd integer.
+ fld %st // y : y
+ fistpl (%esp) // y
+ fildl (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 17f
+
+ // OK, the value is an integer.
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ testb $1, %dl
+ jz 18f // jump if not odd
+ // It's an odd integer.
+ shrl $31, %edx
+ fldl MOX(minf_mzero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ .align ALIGNARG(4)
+16: fcompl MO(zero)
+ addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+ fnstsw
+ shrl $5, %eax
+ andl $8, %eax
+ fldl MOX(inf_zero, %eax, 1)
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ .align ALIGNARG(4)
+17: shll $30, %edx // sign bit for y in right position
+ addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+18: shrl $31, %edx
+ fldl MOX(inf_zero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ .align ALIGNARG(4)
+ // x is ±0
+20: fstp %st(0) // y
+ testb $2, %dl
+ jz 21f // y > 0
+
+ // x is ±0 and y is < 0. We must find out whether y is an odd integer.
+ testb $2, %dh
+ jz 25f
+
+ // fistpl raises invalid exception for |y| >= 1L<<31, so test
+ // that (in which case y is certainly even) before testing
+ // whether y is odd.
+ fld %st // y : y
+ fabs // |y| : y
+ fcompl MO(p31) // y
+ fnstsw
+ sahf
+ jnc 25f
+
+ fld %st // y : y
+ fistpl (%esp) // y
+ fildl (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 26f
+
+ // OK, the value is an integer.
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ testb $1, %dl
+ jz 27f // jump if not odd
+ // It's an odd integer.
+ // Raise divide-by-zero exception and get minus infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ fchs
+ ret
+
+ cfi_adjust_cfa_offset (4)
+25: fstp %st(0)
+26: addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+27: // Raise divide-by-zero exception and get infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ .align ALIGNARG(4)
+ // x is ±0 and y is > 0. We must find out whether y is an odd integer.
+21: testb $2, %dh
+ jz 22f
+
+ // fistpl raises invalid exception for |y| >= 1L<<31, so test
+ // that (in which case y is certainly even) before testing
+ // whether y is odd.
+ fcoml MO(p31) // y
+ fnstsw
+ sahf
+ jnc 22f
+
+ fld %st // y : y
+ fistpl (%esp) // y
+ fildl (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 23f
+
+ // OK, the value is an integer.
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ testb $1, %dl
+ jz 24f // jump if not odd
+ // It's an odd integer.
+ fldl MO(mzero)
+ ret
+
+ cfi_adjust_cfa_offset (4)
+22: fstp %st(0)
+23: addl $4, %esp // Don't use pop.
+ cfi_adjust_cfa_offset (-4)
+24: fldl MO(zero)
+ ret
+
+END(__ieee754_powf)
+strong_alias (__ieee754_powf, __powf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_powl.S b/REORG.TODO/sysdeps/i386/fpu/e_powl.S
new file mode 100644
index 0000000000..9e162848e4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_powl.S
@@ -0,0 +1,459 @@
+/* ix87 specific implementation of pow function.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type p2,@object
+p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40
+ ASM_SIZE_DIRECTIVE(p2)
+ .type p63,@object
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
+ .type p64,@object
+p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
+ ASM_SIZE_DIRECTIVE(p64)
+ .type p78,@object
+p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
+ ASM_SIZE_DIRECTIVE(p78)
+ .type pm79,@object
+pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+ ASM_SIZE_DIRECTIVE(pm79)
+
+ .section .rodata.cst16,"aM",@progbits,16
+
+ .p2align 3
+ .type infinity,@object
+inf_zero:
+infinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+ ASM_SIZE_DIRECTIVE(infinity)
+ .type zero,@object
+zero: .double 0.0
+ ASM_SIZE_DIRECTIVE(zero)
+ .type minf_mzero,@object
+minf_mzero:
+minfinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ ASM_SIZE_DIRECTIVE(minf_mzero)
+DEFINE_LDBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+# define MO(op) op
+# define MOX(op,x,f) op(,x,f)
+#endif
+
+ .text
+ENTRY(__ieee754_powl)
+ fldt 16(%esp) // y
+ fxam
+
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+
+ fnstsw
+ movb %ah, %dl
+ andb $0x45, %ah
+ cmpb $0x40, %ah // is y == 0 ?
+ je 11f
+
+ cmpb $0x05, %ah // is y == ±inf ?
+ je 12f
+
+ cmpb $0x01, %ah // is y == NaN ?
+ je 30f
+
+ fldt 4(%esp) // x : y
+
+ subl $8,%esp
+ cfi_adjust_cfa_offset (8)
+
+ fxam
+ fnstsw
+ movb %ah, %dh
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ je 20f // x is ±0
+
+ cmpb $0x05, %ah
+ je 15f // x is ±inf
+
+ cmpb $0x01, %ah
+ je 32f // x is NaN
+
+ fxch // y : x
+
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
+ /* First see whether `y' is a natural number. In this case we
+ can use a more precise algorithm. */
+ fld %st // y : y : x
+ fistpll (%esp) // y : x
+ fildll (%esp) // int(y) : y : x
+ fucomp %st(1) // y : x
+ fnstsw
+ sahf
+ je 9f
+
+ // If y has absolute value at most 0x1p-79, then any finite
+ // nonzero x will result in 1. Saturate y to those bounds to
+ // avoid underflow in the calculation of y*log2(x).
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(pm79) // y : x
+ fnstsw
+ sahf
+ jnc 3f
+ fstp %st(0) // pop y
+ fldl MO(pm79) // 0x1p-79 : x
+ testb $2, %dl
+ jnz 3f // y > 0
+ fchs // -0x1p-79 : x
+ jmp 3f
+
+9: /* OK, we have an integer value for y. Unless very small
+ (we use < 4), use the algorithm for real exponent to avoid
+ accumulation of errors. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p2) // y : x
+ fnstsw
+ sahf
+ jnc 3f
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ orl $0, %edx
+ fstp %st(0) // x
+ jns 4f // y >= 0, jump
+ fdivrl MO(one) // 1/x (now referred to as x)
+ negl %eax
+ adcl $0, %edx
+ negl %edx
+4: fldl MO(one) // 1 : x
+ fxch
+
+ /* If y is even, take the absolute value of x. Otherwise,
+ ensure all intermediate values that might overflow have the
+ sign of x. */
+ testb $1, %al
+ jnz 6f
+ fabs
+
+6: shrdl $1, %edx, %eax
+ jnc 5f
+ fxch
+ fabs
+ fmul %st(1) // x : ST*x
+ fxch
+5: fld %st // x : x : ST*x
+ fabs // |x| : x : ST*x
+ fmulp // |x|*x : ST*x
+ shrl $1, %edx
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jnz 6b
+ fstp %st(0) // ST*x
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ LDBL_CHECK_FORCE_UFLOW_NONNAN
+ ret
+
+ /* y is ±NAN */
+30: fldt 4(%esp) // x : y
+ fldl MO(one) // 1.0 : x : y
+ fucomp %st(1) // x : y
+ fnstsw
+ sahf
+ je 33f
+31: /* At least one argument NaN, and result should be NaN. */
+ faddp
+ ret
+33: jp 31b
+ /* pow (1, NaN); check if the NaN signaling. */
+ testb $0x40, 23(%esp)
+ jz 31b
+ fstp %st(1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+32: addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ faddp
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+2: // y is a large integer (absolute value at least 1L<<63).
+ // If y has absolute value at least 1L<<78, then any finite
+ // nonzero x will result in 0 (underflow), 1 or infinity (overflow).
+ // Saturate y to those bounds to avoid overflow in the calculation
+ // of y*log2(x).
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p78) // y : x
+ fnstsw
+ sahf
+ jc 3f
+ fstp %st(0) // pop y
+ fldl MO(p78) // 1L<<78 : x
+ testb $2, %dl
+ jz 3f // y > 0
+ fchs // -(1L<<78) : x
+ .align ALIGNARG(4)
+3: /* y is a real number. */
+ subl $28, %esp
+ cfi_adjust_cfa_offset (28)
+ fstpt 12(%esp) // x
+ fstpt (%esp) // <empty>
+ call HIDDEN_JUMPTARGET (__powl_helper) // <result>
+ addl $36, %esp
+ cfi_adjust_cfa_offset (-36)
+ ret
+
+ // pow(x,±0) = 1, unless x is sNaN
+ .align ALIGNARG(4)
+11: fstp %st(0) // pop y
+ fldt 4(%esp) // x
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 112f // x is NaN
+111: fstp %st(0)
+ fldl MO(one)
+ ret
+
+112: testb $0x40, 11(%esp)
+ jnz 111b
+ fadd %st(0)
+ ret
+
+ // y == ±inf
+ .align ALIGNARG(4)
+12: fstp %st(0) // pop y
+ fldl MO(one) // 1
+ fldt 4(%esp) // x : 1
+ fabs // abs(x) : 1
+ fucompp // < 1, == 1, or > 1
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x45, %ah
+ je 13f // jump if x is NaN
+
+ cmpb $0x40, %ah
+ je 14f // jump if |x| == 1
+
+ shlb $1, %ah
+ xorb %ah, %dl
+ andl $2, %edx
+ fldl MOX(inf_zero, %edx, 4)
+ ret
+
+ .align ALIGNARG(4)
+14: fldl MO(one)
+ ret
+
+ .align ALIGNARG(4)
+13: fldt 4(%esp) // load x == NaN
+ fadd %st(0)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±inf
+15: fstp %st(0) // y
+ testb $2, %dh
+ jz 16f // jump if x == +inf
+
+ // fistpll raises invalid exception for |y| >= 1L<<63, but y
+ // may be odd unless we know |y| >= 1L<<64.
+ fld %st // y : y
+ fabs // |y| : y
+ fcompl MO(p64) // y
+ fnstsw
+ sahf
+ jnc 16f
+ fldl MO(p63) // p63 : y
+ fxch // y : p63
+ fprem // y%p63 : p63
+ fstp %st(1) // y%p63
+
+ // We must find out whether y is an odd integer.
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 17f
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 18f // jump if not odd
+ // It's an odd integer.
+ shrl $31, %edx
+ fldl MOX(minf_mzero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+16: fcompl MO(zero)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fnstsw
+ shrl $5, %eax
+ andl $8, %eax
+ fldl MOX(inf_zero, %eax, 1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+17: shll $30, %edx // sign bit for y in right position
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+18: shrl $31, %edx
+ fldl MOX(inf_zero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±0
+20: fstp %st(0) // y
+ testb $2, %dl
+ jz 21f // y > 0
+
+ // x is ±0 and y is < 0. We must find out whether y is an odd integer.
+ testb $2, %dh
+ jz 25f
+
+ // fistpll raises invalid exception for |y| >= 1L<<63, but y
+ // may be odd unless we know |y| >= 1L<<64.
+ fld %st // y : y
+ fabs // |y| : y
+ fcompl MO(p64) // y
+ fnstsw
+ sahf
+ jnc 25f
+ fldl MO(p63) // p63 : y
+ fxch // y : p63
+ fprem // y%p63 : p63
+ fstp %st(1) // y%p63
+
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 26f
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 27f // jump if not odd
+ // It's an odd integer.
+ // Raise divide-by-zero exception and get minus infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ fchs
+ ret
+
+ cfi_adjust_cfa_offset (8)
+25: fstp %st(0)
+26: addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+27: // Raise divide-by-zero exception and get infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±0 and y is > 0. We must find out whether y is an odd integer.
+21: testb $2, %dh
+ jz 22f
+
+ // fistpll raises invalid exception for |y| >= 1L<<63, but y
+ // may be odd unless we know |y| >= 1L<<64.
+ fld %st // y : y
+ fcompl MO(p64) // y
+ fnstsw
+ sahf
+ jnc 22f
+ fldl MO(p63) // p63 : y
+ fxch // y : p63
+ fprem // y%p63 : p63
+ fstp %st(1) // y%p63
+
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne 23f
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz 24f // jump if not odd
+ // It's an odd integer.
+ fldl MO(mzero)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+22: fstp %st(0)
+23: addl $8, %esp // Don't use 2 x pop
+ cfi_adjust_cfa_offset (-8)
+24: fldl MO(zero)
+ ret
+
+END(__ieee754_powl)
+strong_alias (__ieee754_powl, __powl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_rem_pio2.c b/REORG.TODO/sysdeps/i386/fpu/e_rem_pio2.c
new file mode 100644
index 0000000000..1347b0468c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_rem_pio2.c
@@ -0,0 +1,3 @@
+/* Empty. This file is only meant to avoid compiling the file with the
+ same name in the libm-ieee754 directory. The code is not used since
+ there is an assembler version for all users of this file. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_remainder.S b/REORG.TODO/sysdeps/i386/fpu/e_remainder.S
new file mode 100644
index 0000000000..f7867aa90b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_remainder.S
@@ -0,0 +1,18 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_remainder)
+ fldl 12(%esp)
+ fldl 4(%esp)
+1: fprem1
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ ret
+END (__ieee754_remainder)
+strong_alias (__ieee754_remainder, __remainder_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_remainderf.S b/REORG.TODO/sysdeps/i386/fpu/e_remainderf.S
new file mode 100644
index 0000000000..cfd390bc69
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_remainderf.S
@@ -0,0 +1,18 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_remainderf)
+ flds 8(%esp)
+ flds 4(%esp)
+1: fprem1
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ ret
+END (__ieee754_remainderf)
+strong_alias (__ieee754_remainderf, __remainderf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_remainderl.S b/REORG.TODO/sysdeps/i386/fpu/e_remainderl.S
new file mode 100644
index 0000000000..5ec23a37a3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_remainderl.S
@@ -0,0 +1,20 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_remainderl)
+ fldt 16(%esp)
+ fldt 4(%esp)
+1: fprem1
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ ret
+END (__ieee754_remainderl)
+strong_alias (__ieee754_remainderl, __remainderl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_scalb.S b/REORG.TODO/sysdeps/i386/fpu/e_scalb.S
new file mode 100644
index 0000000000..370924c29f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_scalb.S
@@ -0,0 +1,100 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Correct handling of y==-inf <drepper@gnu>
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type zero_nan,@object
+zero_nan:
+ .double 0.0
+nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
+ ASM_SIZE_DIRECTIVE(zero_nan)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+# define MO(op) op
+# define MOX(op,x,f) op(,x,f)
+#endif
+
+ .text
+ENTRY(__ieee754_scalb)
+ fldl 12(%esp)
+ fxam
+ fnstsw
+ fldl 4(%esp)
+ andl $0x4700, %eax
+ cmpl $0x0700, %eax
+ je 1f
+ andl $0x4500, %eax
+ cmpl $0x0100, %eax
+ je 2f
+ fxam
+ fnstsw
+ andl $0x4500, %eax
+ cmpl $0x0100, %eax
+ je 3f
+ fld %st(1)
+ frndint
+ fcomp %st(2)
+ fnstsw
+ sahf
+ jne 4f
+ fscale
+ fstp %st(1)
+ DBL_NARROW_EVAL
+ ret
+
+ /* y is -inf */
+1: fxam
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fnstsw
+ movl 8(%esp), %edx
+ shrl $5, %eax
+ fstp %st
+ fstp %st
+ andl $0x80000000, %edx
+ andl $0x0228, %eax
+ cmpl $0x0028, %eax
+ je 4f
+ andl $8, %eax
+ shrl $27, %edx
+ addl %edx, %eax
+ fldl MOX(zero_nan, %eax, 1)
+ ret
+
+ /* The result is NaN, but we must not raise an exception.
+ So use a variable. */
+2: fstp %st
+ fstp %st
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl MO(nan)
+ ret
+
+ /* The first parameter is a NaN. Return it. */
+3: fstp %st(1)
+ ret
+
+ /* Return NaN and raise the invalid exception. */
+4: fstp %st
+ fstp %st
+ fldz
+ fdiv %st
+ ret
+END(__ieee754_scalb)
+strong_alias (__ieee754_scalb, __scalb_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_scalbf.S b/REORG.TODO/sysdeps/i386/fpu/e_scalbf.S
new file mode 100644
index 0000000000..4f2dfa3acf
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_scalbf.S
@@ -0,0 +1,102 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Correct handling of y==-inf <drepper@gnu>
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type zero_nan,@object
+zero_nan:
+ .double 0.0
+nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
+ ASM_SIZE_DIRECTIVE(zero_nan)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+# define MO(op) op
+# define MOX(op,x,f) op(,x,f)
+#endif
+
+
+ .text
+ENTRY(__ieee754_scalbf)
+ flds 8(%esp)
+ fxam
+ fnstsw
+ flds 4(%esp)
+ andl $0x4700, %eax
+ cmpl $0x0700, %eax
+ je 1f
+ andl $0x4500, %eax
+ cmpl $0x0100, %eax
+ je 2f
+ fxam
+ fnstsw
+ andl $0x4500, %eax
+ cmpl $0x0100, %eax
+ je 3f
+ fld %st(1)
+ frndint
+ fcomp %st(2)
+ fnstsw
+ sahf
+ jne 4f
+ fscale
+ fstp %st(1)
+ FLT_NARROW_EVAL
+ ret
+
+ /* y is -inf */
+1: fxam
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fnstsw
+ movl 4(%esp), %edx
+ shrl $5, %eax
+ fstp %st
+ fstp %st
+ andl $0x80000000, %edx
+ andl $0x0228, %eax
+ cmpl $0x0028, %eax
+ je 4f
+ andl $8, %eax
+ shrl $27, %edx
+ addl %edx, %eax
+ fldl MOX(zero_nan, %eax, 1)
+ ret
+
+ /* The result is NaN, but we must not raise an exception.
+ So use a variable. */
+2: fstp %st
+ fstp %st
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl MO(nan)
+ ret
+
+ /* The first parameter is a NaN. Return it. */
+3: fstp %st(1)
+ ret
+
+ /* Return NaN and raise the invalid exception. */
+4: fstp %st
+ fstp %st
+ fldz
+ fdiv %st
+ ret
+END(__ieee754_scalbf)
+strong_alias (__ieee754_scalbf, __scalbf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_scalbl.S b/REORG.TODO/sysdeps/i386/fpu/e_scalbl.S
new file mode 100644
index 0000000000..896f599cb0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_scalbl.S
@@ -0,0 +1,90 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Correct handling of y==-inf <drepper@gnu>
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type zero_nan,@object
+zero_nan:
+ .double 0.0
+nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
+ ASM_SIZE_DIRECTIVE(zero_nan)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+# define MO(op) op
+# define MOX(op,x,f) op(,x,f)
+#endif
+
+ .text
+ENTRY(__ieee754_scalbl)
+ fldt 16(%esp)
+ fxam
+ fnstsw
+ fldt 4(%esp)
+ andl $0x4700, %eax
+ cmpl $0x0700, %eax
+ je 1f
+ andl $0x4500, %eax
+ cmpl $0x0100, %eax
+ je 2f
+ fxam
+ fnstsw
+ andl $0x4500, %eax
+ cmpl $0x0100, %eax
+ je 2f
+ fld %st(1)
+ frndint
+ fcomp %st(2)
+ fnstsw
+ sahf
+ jne 4f
+ fscale
+ fstp %st(1)
+ ret
+
+ /* y is -inf */
+1: fxam
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fnstsw
+ movl 12(%esp), %edx
+ shrl $5, %eax
+ fstp %st
+ fstp %st
+ andl $0x8000, %edx
+ andl $0x0228, %eax
+ cmpl $0x0028, %eax
+ je 4f
+ andl $8, %eax
+ shrl $11, %edx
+ addl %edx, %eax
+ fldl MOX(zero_nan, %eax, 1)
+ ret
+
+ /* The result is NaN; raise an exception for sNaN arguments. */
+2: faddp
+ ret
+
+ /* Return NaN and raise the invalid exception. */
+4: fstp %st
+ fstp %st
+ fldz
+ fdiv %st
+ ret
+END(__ieee754_scalbl)
+strong_alias (__ieee754_scalbl, __scalbl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_sqrt.S b/REORG.TODO/sysdeps/i386/fpu/e_sqrt.S
new file mode 100644
index 0000000000..fba5833a9a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_sqrt.S
@@ -0,0 +1,23 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_sqrt)
+ fldl 4(%esp)
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fstcw 4(%esp)
+ movl $0xfeff, %edx
+ andl 4(%esp), %edx
+ movl %edx, (%esp)
+ fldcw (%esp)
+ fsqrt
+ fldcw 4(%esp)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ ret
+END (__ieee754_sqrt)
+strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_sqrtf.S b/REORG.TODO/sysdeps/i386/fpu/e_sqrtf.S
new file mode 100644
index 0000000000..6f7e4b015f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_sqrtf.S
@@ -0,0 +1,13 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__ieee754_sqrtf)
+ flds 4(%esp)
+ fsqrt
+ ret
+END (__ieee754_sqrtf)
+strong_alias (__ieee754_sqrtf, __sqrtf_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/e_sqrtl.c b/REORG.TODO/sysdeps/i386/fpu/e_sqrtl.c
new file mode 100644
index 0000000000..41bcd7eeb7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/e_sqrtl.c
@@ -0,0 +1,20 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <math_private.h>
+
+#undef __ieee754_sqrtl
+long double
+__ieee754_sqrtl (long double x)
+{
+ long double res;
+
+ asm ("fsqrt" : "=t" (res) : "0" (x));
+
+ return res;
+}
+strong_alias (__ieee754_sqrtl, __sqrtl_finite)
diff --git a/REORG.TODO/sysdeps/i386/fpu/fclrexcpt.c b/REORG.TODO/sysdeps/i386/fpu/fclrexcpt.c
new file mode 100644
index 0000000000..5d8596964b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fclrexcpt.c
@@ -0,0 +1,69 @@
+/* Clear given exceptions in current floating-point environment.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+__feclearexcept (int excepts)
+{
+ fenv_t temp;
+
+ /* Mask out unsupported bits/exceptions. */
+ excepts &= FE_ALL_EXCEPT;
+
+ /* Bah, we have to clear selected exceptions. Since there is no
+ `fldsw' instruction we have to do it the hard way. */
+ __asm__ ("fnstenv %0" : "=m" (*&temp));
+
+ /* Clear the relevant bits. */
+ temp.__status_word &= excepts ^ FE_ALL_EXCEPT;
+
+ /* Put the new data in effect. */
+ __asm__ ("fldenv %0" : : "m" (*&temp));
+
+ /* If the CPU supports SSE, we clear the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ /* Clear the relevant bits. */
+ xnew_exc &= ~excepts;
+
+ /* Put the new data in effect. */
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/fedisblxcpt.c b/REORG.TODO/sysdeps/i386/fpu/fedisblxcpt.c
new file mode 100644
index 0000000000..f8db665425
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fedisblxcpt.c
@@ -0,0 +1,54 @@
+/* Disable floating-point exceptions.
+ Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Andreas Jaeger <aj@suse.de>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+fedisableexcept (int excepts)
+{
+ unsigned short int new_exc, old_exc;
+
+ /* Get the current control word. */
+ __asm__ ("fstcw %0" : "=m" (*&new_exc));
+
+ old_exc = (~new_exc) & FE_ALL_EXCEPT;
+
+ excepts &= FE_ALL_EXCEPT;
+
+ new_exc |= excepts;
+ __asm__ ("fldcw %0" : : "m" (*&new_exc));
+
+ /* If the CPU supports SSE we set the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current control word. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ xnew_exc |= excepts << 7;
+
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+
+ return old_exc;
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/feenablxcpt.c b/REORG.TODO/sysdeps/i386/fpu/feenablxcpt.c
new file mode 100644
index 0000000000..f1c42d7c27
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/feenablxcpt.c
@@ -0,0 +1,54 @@
+/* Enable floating-point exceptions.
+ Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Andreas Jaeger <aj@suse.de>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+feenableexcept (int excepts)
+{
+ unsigned short int new_exc;
+ unsigned short int old_exc;
+
+ /* Get the current control word. */
+ __asm__ ("fstcw %0" : "=m" (*&new_exc));
+
+ excepts &= FE_ALL_EXCEPT;
+ old_exc = (~new_exc) & FE_ALL_EXCEPT;
+
+ new_exc &= ~excepts;
+ __asm__ ("fldcw %0" : : "m" (*&new_exc));
+
+ /* If the CPU supports SSE we set the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current control word. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ xnew_exc &= ~(excepts << 7);
+
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+
+ return old_exc;
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/fegetenv.c b/REORG.TODO/sysdeps/i386/fpu/fegetenv.c
new file mode 100644
index 0000000000..983f6af25e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fegetenv.c
@@ -0,0 +1,49 @@
+/* Store current floating-point environment.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+__fegetenv (fenv_t *envp)
+{
+ __asm__ ("fnstenv %0" : "=m" (*envp));
+ /* And load it right back since the processor changes the mask.
+ Intel thought this opcode to be used in interrupt handlers which
+ would block all exceptions. */
+ __asm__ ("fldenv %0" : : "m" (*envp));
+
+ if (HAS_CPU_FEATURE (SSE))
+ __asm__ ("stmxcsr %0" : "=m" (envp->__eip));
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_def (__fegetenv)
+libm_hidden_ver (__fegetenv, fegetenv)
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/fegetexcept.c b/REORG.TODO/sysdeps/i386/fpu/fegetexcept.c
new file mode 100644
index 0000000000..dc87b7a470
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fegetexcept.c
@@ -0,0 +1,31 @@
+/* Get enabled floating-point exceptions.
+ Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Andreas Jaeger <aj@suse.de>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+
+int
+fegetexcept (void)
+{
+ unsigned short int exc;
+
+ /* Get the current control word. */
+ __asm__ ("fstcw %0" : "=m" (*&exc));
+
+ return (~exc) & FE_ALL_EXCEPT;
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/fegetmode.c b/REORG.TODO/sysdeps/i386/fpu/fegetmode.c
new file mode 100644
index 0000000000..abbce3075f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fegetmode.c
@@ -0,0 +1,32 @@
+/* Store current floating-point control modes. i386 version.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+fegetmode (femode_t *modep)
+{
+ _FPU_GETCW (modep->__control_word);
+ if (HAS_CPU_FEATURE (SSE))
+ __asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr));
+ return 0;
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/fegetround.c b/REORG.TODO/sysdeps/i386/fpu/fegetround.c
new file mode 100644
index 0000000000..8ce8b859d8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fegetround.c
@@ -0,0 +1,33 @@
+/* Return current rounding direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+
+int
+__fegetround (void)
+{
+ int cw;
+
+ __asm__ ("fnstcw %0" : "=m" (*&cw));
+
+ return cw & 0xc00;
+}
+libm_hidden_def (__fegetround)
+weak_alias (__fegetround, fegetround)
+libm_hidden_weak (fegetround)
diff --git a/REORG.TODO/sysdeps/i386/fpu/feholdexcpt.c b/REORG.TODO/sysdeps/i386/fpu/feholdexcpt.c
new file mode 100644
index 0000000000..d327358913
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/feholdexcpt.c
@@ -0,0 +1,50 @@
+/* Store current floating-point environment and clear exceptions.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+__feholdexcept (fenv_t *envp)
+{
+ /* Store the environment. Recall that fnstenv has a side effect of
+ masking all exceptions. Then clear all exceptions. */
+ __asm__ volatile ("fnstenv %0; fnclex" : "=m" (*envp));
+
+ /* If the CPU supports SSE we set the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xwork;
+
+ /* Get the current control word. */
+ __asm__ ("stmxcsr %0" : "=m" (envp->__eip));
+
+ /* Set all exceptions to non-stop and clear them. */
+ xwork = (envp->__eip | 0x1f80) & ~0x3f;
+
+ __asm__ ("ldmxcsr %0" : : "m" (*&xwork));
+ }
+
+ return 0;
+}
+libm_hidden_def (__feholdexcept)
+weak_alias (__feholdexcept, feholdexcept)
+libm_hidden_weak (feholdexcept)
diff --git a/REORG.TODO/sysdeps/i386/fpu/fenv_private.h b/REORG.TODO/sysdeps/i386/fpu/fenv_private.h
new file mode 100644
index 0000000000..e20e1f1662
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fenv_private.h
@@ -0,0 +1,501 @@
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+#ifdef __SSE2_MATH__
+# define math_opt_barrier(x) \
+ ({ __typeof(x) __x; \
+ if (sizeof (x) <= sizeof (double)) \
+ __asm ("" : "=x" (__x) : "0" (x)); \
+ else \
+ __asm ("" : "=t" (__x) : "0" (x)); \
+ __x; })
+# define math_force_eval(x) \
+ do { \
+ if (sizeof (x) <= sizeof (double)) \
+ __asm __volatile ("" : : "x" (x)); \
+ else \
+ __asm __volatile ("" : : "f" (x)); \
+ } while (0)
+#else
+# define math_opt_barrier(x) \
+ ({ __typeof (x) __x; \
+ __asm ("" : "=t" (__x) : "0" (x)); \
+ __x; })
+# define math_force_eval(x) \
+ do { \
+ __typeof (x) __x = (x); \
+ if (sizeof (x) <= sizeof (double)) \
+ __asm __volatile ("" : : "m" (__x)); \
+ else \
+ __asm __volatile ("" : : "f" (__x)); \
+ } while (0)
+#endif
+
+/* This file is used by both the 32- and 64-bit ports. The 64-bit port
+ has a field in the fenv_t for the mxcsr; the 32-bit port does not.
+ Instead, we (ab)use the only 32-bit field extant in the struct. */
+#ifndef __x86_64__
+# define __mxcsr __eip
+#endif
+
+
+/* All of these functions are private to libm, and are all used in pairs
+ to save+change the fp state and restore the original state. Thus we
+ need not care for both the 387 and the sse unit, only the one we're
+ actually using. */
+
+#if defined __AVX__ || defined SSE2AVX
+# define STMXCSR "vstmxcsr"
+# define LDMXCSR "vldmxcsr"
+#else
+# define STMXCSR "stmxcsr"
+# define LDMXCSR "ldmxcsr"
+#endif
+
+static __always_inline void
+libc_feholdexcept_sse (fenv_t *e)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ e->__mxcsr = mxcsr;
+ mxcsr = (mxcsr | 0x1f80) & ~0x3f;
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_feholdexcept_387 (fenv_t *e)
+{
+ /* Recall that fnstenv has a side-effect of masking exceptions.
+ Clobber all of the fp registers so that the TOS field is 0. */
+ asm volatile ("fnstenv %0; fnclex"
+ : "=m"(*e)
+ : : "st", "st(1)", "st(2)", "st(3)",
+ "st(4)", "st(5)", "st(6)", "st(7)");
+}
+
+static __always_inline void
+libc_fesetround_sse (int r)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ mxcsr = (mxcsr & ~0x6000) | (r << 3);
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_fesetround_387 (int r)
+{
+ fpu_control_t cw;
+ _FPU_GETCW (cw);
+ cw = (cw & ~0xc00) | r;
+ _FPU_SETCW (cw);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_sse (fenv_t *e, int r)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ e->__mxcsr = mxcsr;
+ mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+/* Set both rounding mode and precision. A convenience function for use
+ by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */
+static __always_inline void
+libc_feholdexcept_setround_387_prec (fenv_t *e, int r)
+{
+ libc_feholdexcept_387 (e);
+
+ fpu_control_t cw = e->__control_word;
+ cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
+ cw |= r | 0x3f;
+ _FPU_SETCW (cw);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387 (fenv_t *e, int r)
+{
+ libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387_53bit (fenv_t *e, int r)
+{
+ libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE);
+}
+
+static __always_inline int
+libc_fetestexcept_sse (int e)
+{
+ unsigned int mxcsr;
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+ return mxcsr & e & FE_ALL_EXCEPT;
+}
+
+static __always_inline int
+libc_fetestexcept_387 (int ex)
+{
+ fexcept_t temp;
+ asm volatile ("fnstsw %0" : "=a" (temp));
+ return temp & ex & FE_ALL_EXCEPT;
+}
+
+static __always_inline void
+libc_fesetenv_sse (fenv_t *e)
+{
+ asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
+}
+
+static __always_inline void
+libc_fesetenv_387 (fenv_t *e)
+{
+ /* Clobber all fp registers so that the TOS value we saved earlier is
+ compatible with the current state of the compiler. */
+ asm volatile ("fldenv %0"
+ : : "m" (*e)
+ : "st", "st(1)", "st(2)", "st(3)",
+ "st(4)", "st(5)", "st(6)", "st(7)");
+}
+
+static __always_inline int
+libc_feupdateenv_test_sse (fenv_t *e, int ex)
+{
+ unsigned int mxcsr, old_mxcsr, cur_ex;
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+ cur_ex = mxcsr & FE_ALL_EXCEPT;
+
+ /* Merge current exceptions with the old environment. */
+ old_mxcsr = e->__mxcsr;
+ mxcsr = old_mxcsr | cur_ex;
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+
+ /* Raise SIGFPE for any new exceptions since the hold. Expect that
+ the normal environment has all exceptions masked. */
+ if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex))
+ __feraiseexcept (cur_ex);
+
+ /* Test for exceptions raised since the hold. */
+ return cur_ex & ex;
+}
+
+static __always_inline int
+libc_feupdateenv_test_387 (fenv_t *e, int ex)
+{
+ fexcept_t cur_ex;
+
+ /* Save current exceptions. */
+ asm volatile ("fnstsw %0" : "=a" (cur_ex));
+ cur_ex &= FE_ALL_EXCEPT;
+
+ /* Reload original environment. */
+ libc_fesetenv_387 (e);
+
+ /* Merge current exceptions. */
+ __feraiseexcept (cur_ex);
+
+ /* Test for exceptions raised since the hold. */
+ return cur_ex & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_sse (fenv_t *e)
+{
+ libc_feupdateenv_test_sse (e, 0);
+}
+
+static __always_inline void
+libc_feupdateenv_387 (fenv_t *e)
+{
+ libc_feupdateenv_test_387 (e, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_sse (fenv_t *e, int r)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ e->__mxcsr = mxcsr;
+ mxcsr = (mxcsr & ~0x6000) | (r << 3);
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_feholdsetround_387_prec (fenv_t *e, int r)
+{
+ fpu_control_t cw;
+
+ _FPU_GETCW (cw);
+ e->__control_word = cw;
+ cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
+ cw |= r;
+ _FPU_SETCW (cw);
+}
+
+static __always_inline void
+libc_feholdsetround_387 (fenv_t *e, int r)
+{
+ libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED);
+}
+
+static __always_inline void
+libc_feholdsetround_387_53bit (fenv_t *e, int r)
+{
+ libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE);
+}
+
+static __always_inline void
+libc_feresetround_sse (fenv_t *e)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+
+static __always_inline void
+libc_feresetround_387 (fenv_t *e)
+{
+ _FPU_SETCW (e->__control_word);
+}
+
+#ifdef __SSE_MATH__
+# define libc_feholdexceptf libc_feholdexcept_sse
+# define libc_fesetroundf libc_fesetround_sse
+# define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse
+# define libc_fetestexceptf libc_fetestexcept_sse
+# define libc_fesetenvf libc_fesetenv_sse
+# define libc_feupdateenv_testf libc_feupdateenv_test_sse
+# define libc_feupdateenvf libc_feupdateenv_sse
+# define libc_feholdsetroundf libc_feholdsetround_sse
+# define libc_feresetroundf libc_feresetround_sse
+#else
+# define libc_feholdexceptf libc_feholdexcept_387
+# define libc_fesetroundf libc_fesetround_387
+# define libc_feholdexcept_setroundf libc_feholdexcept_setround_387
+# define libc_fetestexceptf libc_fetestexcept_387
+# define libc_fesetenvf libc_fesetenv_387
+# define libc_feupdateenv_testf libc_feupdateenv_test_387
+# define libc_feupdateenvf libc_feupdateenv_387
+# define libc_feholdsetroundf libc_feholdsetround_387
+# define libc_feresetroundf libc_feresetround_387
+#endif /* __SSE_MATH__ */
+
+#ifdef __SSE2_MATH__
+# define libc_feholdexcept libc_feholdexcept_sse
+# define libc_fesetround libc_fesetround_sse
+# define libc_feholdexcept_setround libc_feholdexcept_setround_sse
+# define libc_fetestexcept libc_fetestexcept_sse
+# define libc_fesetenv libc_fesetenv_sse
+# define libc_feupdateenv_test libc_feupdateenv_test_sse
+# define libc_feupdateenv libc_feupdateenv_sse
+# define libc_feholdsetround libc_feholdsetround_sse
+# define libc_feresetround libc_feresetround_sse
+#else
+# define libc_feholdexcept libc_feholdexcept_387
+# define libc_fesetround libc_fesetround_387
+# define libc_feholdexcept_setround libc_feholdexcept_setround_387
+# define libc_fetestexcept libc_fetestexcept_387
+# define libc_fesetenv libc_fesetenv_387
+# define libc_feupdateenv_test libc_feupdateenv_test_387
+# define libc_feupdateenv libc_feupdateenv_387
+# define libc_feholdsetround libc_feholdsetround_387
+# define libc_feresetround libc_feresetround_387
+#endif /* __SSE2_MATH__ */
+
+#define libc_feholdexceptl libc_feholdexcept_387
+#define libc_fesetroundl libc_fesetround_387
+#define libc_feholdexcept_setroundl libc_feholdexcept_setround_387
+#define libc_fetestexceptl libc_fetestexcept_387
+#define libc_fesetenvl libc_fesetenv_387
+#define libc_feupdateenv_testl libc_feupdateenv_test_387
+#define libc_feupdateenvl libc_feupdateenv_387
+#define libc_feholdsetroundl libc_feholdsetround_387
+#define libc_feresetroundl libc_feresetround_387
+
+#ifndef __SSE2_MATH__
+# define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit
+# define libc_feholdsetround_53bit libc_feholdsetround_387_53bit
+#endif
+
+/* We have support for rounding mode context. */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r)
+{
+ unsigned int mxcsr, new_mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
+
+ ctx->env.__mxcsr = mxcsr;
+ if (__glibc_unlikely (mxcsr != new_mxcsr))
+ {
+ asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr));
+ ctx->updated_status = true;
+ }
+ else
+ ctx->updated_status = false;
+}
+
+/* Unconditional since we want to overwrite any exceptions that occurred in the
+ context. This is also why all fehold* functions unconditionally write into
+ ctx->env. */
+static __always_inline void
+libc_fesetenv_sse_ctx (struct rm_ctx *ctx)
+{
+ libc_fesetenv_sse (&ctx->env);
+}
+
+static __always_inline void
+libc_feupdateenv_sse_ctx (struct rm_ctx *ctx)
+{
+ if (__glibc_unlikely (ctx->updated_status))
+ libc_feupdateenv_test_sse (&ctx->env, 0);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r)
+{
+ libc_feholdexcept_387 (&ctx->env);
+
+ fpu_control_t cw = ctx->env.__control_word;
+ fpu_control_t old_cw = cw;
+ cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
+ cw |= r | 0x3f;
+
+ if (__glibc_unlikely (old_cw != cw))
+ {
+ _FPU_SETCW (cw);
+ ctx->updated_status = true;
+ }
+ else
+ ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r)
+{
+ libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_EXTENDED);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r)
+{
+ libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_DOUBLE);
+}
+
+static __always_inline void
+libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r)
+{
+ fpu_control_t cw, new_cw;
+
+ _FPU_GETCW (cw);
+ new_cw = cw;
+ new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
+ new_cw |= r;
+
+ ctx->env.__control_word = cw;
+ if (__glibc_unlikely (new_cw != cw))
+ {
+ _FPU_SETCW (new_cw);
+ ctx->updated_status = true;
+ }
+ else
+ ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r)
+{
+ libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_EXTENDED);
+}
+
+static __always_inline void
+libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r)
+{
+ libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_DOUBLE);
+}
+
+static __always_inline void
+libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r)
+{
+ unsigned int mxcsr, new_mxcsr;
+
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ new_mxcsr = (mxcsr & ~0x6000) | (r << 3);
+
+ ctx->env.__mxcsr = mxcsr;
+ if (__glibc_unlikely (new_mxcsr != mxcsr))
+ {
+ asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr));
+ ctx->updated_status = true;
+ }
+ else
+ ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_feresetround_sse_ctx (struct rm_ctx *ctx)
+{
+ if (__glibc_unlikely (ctx->updated_status))
+ libc_feresetround_sse (&ctx->env);
+}
+
+static __always_inline void
+libc_feresetround_387_ctx (struct rm_ctx *ctx)
+{
+ if (__glibc_unlikely (ctx->updated_status))
+ _FPU_SETCW (ctx->env.__control_word);
+}
+
+static __always_inline void
+libc_feupdateenv_387_ctx (struct rm_ctx *ctx)
+{
+ if (__glibc_unlikely (ctx->updated_status))
+ libc_feupdateenv_test_387 (&ctx->env, 0);
+}
+
+#ifdef __SSE_MATH__
+# define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx
+# define libc_fesetenvf_ctx libc_fesetenv_sse_ctx
+# define libc_feupdateenvf_ctx libc_feupdateenv_sse_ctx
+# define libc_feholdsetroundf_ctx libc_feholdsetround_sse_ctx
+# define libc_feresetroundf_ctx libc_feresetround_sse_ctx
+#else
+# define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx
+# define libc_feupdateenvf_ctx libc_feupdateenv_387_ctx
+# define libc_feholdsetroundf_ctx libc_feholdsetround_387_ctx
+# define libc_feresetroundf_ctx libc_feresetround_387_ctx
+#endif /* __SSE_MATH__ */
+
+#ifdef __SSE2_MATH__
+# define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx
+# define libc_fesetenv_ctx libc_fesetenv_sse_ctx
+# define libc_feupdateenv_ctx libc_feupdateenv_sse_ctx
+# define libc_feholdsetround_ctx libc_feholdsetround_sse_ctx
+# define libc_feresetround_ctx libc_feresetround_sse_ctx
+#else
+# define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_387_ctx
+# define libc_feupdateenv_ctx libc_feupdateenv_387_ctx
+# define libc_feholdsetround_ctx libc_feholdsetround_387_ctx
+# define libc_feresetround_ctx libc_feresetround_387_ctx
+#endif /* __SSE2_MATH__ */
+
+#define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_387_ctx
+#define libc_feupdateenvl_ctx libc_feupdateenv_387_ctx
+#define libc_feholdsetroundl_ctx libc_feholdsetround_387_ctx
+#define libc_feresetroundl_ctx libc_feresetround_387_ctx
+
+#ifndef __SSE2_MATH__
+# define libc_feholdsetround_53bit_ctx libc_feholdsetround_387_53bit_ctx
+# define libc_feresetround_53bit_ctx libc_feresetround_387_ctx
+#endif
+
+#undef __mxcsr
+
+#endif /* FENV_PRIVATE_H */
diff --git a/REORG.TODO/sysdeps/i386/fpu/fesetenv.c b/REORG.TODO/sysdeps/i386/fpu/fesetenv.c
new file mode 100644
index 0000000000..a338e5d555
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fesetenv.c
@@ -0,0 +1,131 @@
+/* Install given floating-point environment.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <assert.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+
+/* All exceptions, including the x86-specific "denormal operand"
+ exception. */
+#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM)
+
+
+int
+__fesetenv (const fenv_t *envp)
+{
+ fenv_t temp;
+
+ /* The memory block used by fstenv/fldenv has a size of 28 bytes. */
+ assert (sizeof (fenv_t) == 28);
+
+ /* Install the environment specified by ENVP. But there are a few
+ values which we do not want to come from the saved environment.
+ Therefore, we get the current environment and replace the values
+ we want to use from the environment specified by the parameter. */
+ __asm__ ("fnstenv %0" : "=m" (*&temp));
+
+ if (envp == FE_DFL_ENV)
+ {
+ temp.__control_word |= FE_ALL_EXCEPT_X86;
+ temp.__control_word &= ~FE_TOWARDZERO;
+ temp.__control_word |= _FPU_EXTENDED;
+ temp.__status_word &= ~FE_ALL_EXCEPT_X86;
+ }
+ else if (envp == FE_NOMASK_ENV)
+ {
+ temp.__control_word &= ~(FE_ALL_EXCEPT | FE_TOWARDZERO);
+ /* Keep the "denormal operand" exception masked. */
+ temp.__control_word |= __FE_DENORM;
+ temp.__control_word |= _FPU_EXTENDED;
+ temp.__status_word &= ~FE_ALL_EXCEPT_X86;
+ }
+ else
+ {
+ temp.__control_word &= ~(FE_ALL_EXCEPT_X86
+ | FE_TOWARDZERO
+ | _FPU_EXTENDED);
+ temp.__control_word |= (envp->__control_word
+ & (FE_ALL_EXCEPT_X86
+ | FE_TOWARDZERO
+ | _FPU_EXTENDED));
+ temp.__status_word &= ~FE_ALL_EXCEPT_X86;
+ temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT_X86;
+ }
+ temp.__eip = 0;
+ temp.__cs_selector = 0;
+ temp.__opcode = 0;
+ temp.__data_offset = 0;
+ temp.__data_selector = 0;
+
+ __asm__ ("fldenv %0" : : "m" (temp));
+
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int mxcsr;
+ __asm__ ("stmxcsr %0" : "=m" (mxcsr));
+
+ if (envp == FE_DFL_ENV)
+ {
+ /* Clear SSE exceptions. */
+ mxcsr &= ~FE_ALL_EXCEPT_X86;
+ /* Set mask for SSE MXCSR. */
+ mxcsr |= (FE_ALL_EXCEPT_X86 << 7);
+ /* Set rounding to FE_TONEAREST. */
+ mxcsr &= ~0x6000;
+ mxcsr |= (FE_TONEAREST << 3);
+ /* Clear the FZ and DAZ bits. */
+ mxcsr &= ~0x8040;
+ }
+ else if (envp == FE_NOMASK_ENV)
+ {
+ /* Clear SSE exceptions. */
+ mxcsr &= ~FE_ALL_EXCEPT_X86;
+ /* Do not mask exceptions. */
+ mxcsr &= ~(FE_ALL_EXCEPT << 7);
+ /* Keep the "denormal operand" exception masked. */
+ mxcsr |= (__FE_DENORM << 7);
+ /* Set rounding to FE_TONEAREST. */
+ mxcsr &= ~0x6000;
+ mxcsr |= (FE_TONEAREST << 3);
+ /* Clear the FZ and DAZ bits. */
+ mxcsr &= ~0x8040;
+ }
+ else
+ mxcsr = envp->__eip;
+
+ __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
+ }
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_def (__fesetenv)
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/fesetexcept.c b/REORG.TODO/sysdeps/i386/fpu/fesetexcept.c
new file mode 100644
index 0000000000..adfcf17ba6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fesetexcept.c
@@ -0,0 +1,31 @@
+/* Set given exception flags. i386 version.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+
+int
+fesetexcept (int excepts)
+{
+ fenv_t temp;
+
+ __asm__ ("fnstenv %0" : "=m" (*&temp));
+ temp.__status_word |= excepts & FE_ALL_EXCEPT;
+ __asm__ ("fldenv %0" : : "m" (*&temp));
+
+ return 0;
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/fesetmode.c b/REORG.TODO/sysdeps/i386/fpu/fesetmode.c
new file mode 100644
index 0000000000..bd9f74cd97
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fesetmode.c
@@ -0,0 +1,54 @@
+/* Install given floating-point control modes. i386 version.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+/* All exceptions, including the x86-specific "denormal operand"
+ exception. */
+#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM)
+
+int
+fesetmode (const femode_t *modep)
+{
+ fpu_control_t cw;
+ if (modep == FE_DFL_MODE)
+ cw = _FPU_DEFAULT;
+ else
+ cw = modep->__control_word;
+ _FPU_SETCW (cw);
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int mxcsr;
+ __asm__ ("stmxcsr %0" : "=m" (mxcsr));
+ /* Preserve SSE exception flags but restore other state in
+ MXCSR. */
+ mxcsr &= FE_ALL_EXCEPT_X86;
+ if (modep == FE_DFL_MODE)
+ /* Default MXCSR state has all bits zero except for those
+ masking exceptions. */
+ mxcsr |= FE_ALL_EXCEPT_X86 << 7;
+ else
+ mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
+ __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
+ }
+ return 0;
+}
diff --git a/REORG.TODO/sysdeps/i386/fpu/fesetround.c b/REORG.TODO/sysdeps/i386/fpu/fesetround.c
new file mode 100644
index 0000000000..a3fa6235c0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fesetround.c
@@ -0,0 +1,54 @@
+/* Set current rounding direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+__fesetround (int round)
+{
+ unsigned short int cw;
+
+ if ((round & ~0xc00) != 0)
+ /* ROUND is no valid rounding mode. */
+ return 1;
+
+ __asm__ ("fnstcw %0" : "=m" (*&cw));
+ cw &= ~0xc00;
+ cw |= round;
+ __asm__ ("fldcw %0" : : "m" (*&cw));
+
+ /* If the CPU supports SSE we set the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xcw;
+
+ __asm__ ("stmxcsr %0" : "=m" (*&xcw));
+ xcw &= ~0x6000;
+ xcw |= round << 3;
+ __asm__ ("ldmxcsr %0" : : "m" (*&xcw));
+ }
+
+ return 0;
+}
+libm_hidden_def (__fesetround)
+weak_alias (__fesetround, fesetround)
+libm_hidden_weak (fesetround)
diff --git a/REORG.TODO/sysdeps/i386/fpu/feupdateenv.c b/REORG.TODO/sysdeps/i386/fpu/feupdateenv.c
new file mode 100644
index 0000000000..b610289cd0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/feupdateenv.c
@@ -0,0 +1,60 @@
+/* Install given floating-point environment and raise exceptions.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <dl-procinfo.h>
+#include <ldsodefs.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+ fexcept_t temp;
+ unsigned int xtemp = 0;
+
+ /* Save current exceptions. */
+ __asm__ ("fnstsw %0" : "=m" (*&temp));
+
+ /* If the CPU supports SSE we test the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ __asm__ ("stmxcsr %0" : "=m" (*&xtemp));
+
+ temp = (temp | xtemp) & FE_ALL_EXCEPT;
+
+ /* Install new environment. */
+ __fesetenv (envp);
+
+ /* Raise the saved exception. Incidently for us the implementation
+ defined format of the values in objects of type fexcept_t is the
+ same as the ones specified using the FE_* constants. */
+ __feraiseexcept ((int) temp);
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_def (__feupdateenv)
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/fgetexcptflg.c b/REORG.TODO/sysdeps/i386/fpu/fgetexcptflg.c
new file mode 100644
index 0000000000..954e5f69d8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fgetexcptflg.c
@@ -0,0 +1,57 @@
+/* Store current representation for exceptions.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+ fexcept_t temp;
+
+ /* Get the current exceptions. */
+ __asm__ ("fnstsw %0" : "=m" (*&temp));
+
+ *flagp = temp & excepts & FE_ALL_EXCEPT;
+
+ /* If the CPU supports SSE, we clear the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int sse_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&sse_exc));
+
+ *flagp |= sse_exc & excepts & FE_ALL_EXCEPT;
+ }
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/fraiseexcpt.c b/REORG.TODO/sysdeps/i386/fpu/fraiseexcpt.c
new file mode 100644
index 0000000000..913d7b912c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fraiseexcpt.c
@@ -0,0 +1,124 @@
+/* Raise given exceptions.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <math.h>
+
+int
+__feraiseexcept (int excepts)
+{
+ /* Raise exceptions represented by EXPECTS. But we must raise only
+ one signal at a time. It is important that if the overflow/underflow
+ exception and the inexact exception are given at the same time,
+ the overflow/underflow exception follows the inexact exception. */
+
+ /* First: invalid exception. */
+ if ((FE_INVALID & excepts) != 0)
+ {
+ /* One example of an invalid operation is 0.0 / 0.0. */
+ double d;
+ __asm__ __volatile__ ("fldz; fdiv %%st, %%st(0); fwait" : "=t" (d));
+ (void) &d;
+ }
+
+ /* Next: division by zero. */
+ if ((FE_DIVBYZERO & excepts) != 0)
+ {
+ double d;
+ __asm__ __volatile__ ("fldz; fld1; fdivp %%st, %%st(1); fwait"
+ : "=t" (d));
+ (void) &d;
+ }
+
+ /* Next: overflow. */
+ if ((FE_OVERFLOW & excepts) != 0)
+ {
+ /* There is no way to raise only the overflow flag. Do it the
+ hard way. */
+ fenv_t temp;
+
+ /* Bah, we have to clear selected exceptions. Since there is no
+ `fldsw' instruction we have to do it the hard way. */
+ __asm__ __volatile__ ("fnstenv %0" : "=m" (*&temp));
+
+ /* Set the relevant bits. */
+ temp.__status_word |= FE_OVERFLOW;
+
+ /* Put the new data in effect. */
+ __asm__ __volatile__ ("fldenv %0" : : "m" (*&temp));
+
+ /* And raise the exception. */
+ __asm__ __volatile__ ("fwait");
+ }
+
+ /* Next: underflow. */
+ if ((FE_UNDERFLOW & excepts) != 0)
+ {
+ /* There is no way to raise only the underflow flag. Do it the
+ hard way. */
+ fenv_t temp;
+
+ /* Bah, we have to clear selected exceptions. Since there is no
+ `fldsw' instruction we have to do it the hard way. */
+ __asm__ __volatile__ ("fnstenv %0" : "=m" (*&temp));
+
+ /* Set the relevant bits. */
+ temp.__status_word |= FE_UNDERFLOW;
+
+ /* Put the new data in effect. */
+ __asm__ __volatile__ ("fldenv %0" : : "m" (*&temp));
+
+ /* And raise the exception. */
+ __asm__ __volatile__ ("fwait");
+ }
+
+ /* Last: inexact. */
+ if ((FE_INEXACT & excepts) != 0)
+ {
+ /* There is no way to raise only the inexact flag. Do it the
+ hard way. */
+ fenv_t temp;
+
+ /* Bah, we have to clear selected exceptions. Since there is no
+ `fldsw' instruction we have to do it the hard way. */
+ __asm__ __volatile__ ("fnstenv %0" : "=m" (*&temp));
+
+ /* Set the relevant bits. */
+ temp.__status_word |= FE_INEXACT;
+
+ /* Put the new data in effect. */
+ __asm__ __volatile__ ("fldenv %0" : : "m" (*&temp));
+
+ /* And raise the exception. */
+ __asm__ __volatile__ ("fwait");
+ }
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_def (__feraiseexcept)
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/fsetexcptflg.c b/REORG.TODO/sysdeps/i386/fpu/fsetexcptflg.c
new file mode 100644
index 0000000000..efa64aaefd
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/fsetexcptflg.c
@@ -0,0 +1,69 @@
+/* Set floating-point environment exception handling.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <math.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+int
+__fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+ fenv_t temp;
+
+ /* Get the current environment. We have to do this since we cannot
+ separately set the status word. */
+ __asm__ ("fnstenv %0" : "=m" (*&temp));
+
+ temp.__status_word &= ~(excepts & FE_ALL_EXCEPT);
+ temp.__status_word |= *flagp & excepts & FE_ALL_EXCEPT;
+
+ /* Store the new status word (along with the rest of the environment.
+ Possibly new exceptions are set but they won't get executed unless
+ the next floating-point instruction. */
+ __asm__ ("fldenv %0" : : "m" (*&temp));
+
+ /* If the CPU supports SSE, we set the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ /* Set the relevant bits. */
+ xnew_exc &= ~(excepts & FE_ALL_EXCEPT);
+ xnew_exc |= *flagp & excepts & FE_ALL_EXCEPT;
+
+ /* Put the new data in effect. */
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/REORG.TODO/sysdeps/i386/fpu/ftestexcept.c b/REORG.TODO/sysdeps/i386/fpu/ftestexcept.c
new file mode 100644
index 0000000000..f523f9e709
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/ftestexcept.c
@@ -0,0 +1,40 @@
+/* Test exception in current environment.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <unistd.h>
+#include <dl-procinfo.h>
+#include <ldsodefs.h>
+
+int
+fetestexcept (int excepts)
+{
+ short temp;
+ int xtemp = 0;
+
+ /* Get current exceptions. */
+ __asm__ ("fnstsw %0" : "=a" (temp));
+
+ /* If the CPU supports SSE we test the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ __asm__ ("stmxcsr %0" : "=m" (*&xtemp));
+
+ return (temp | xtemp) & excepts & FE_ALL_EXCEPT;
+}
+libm_hidden_def (fetestexcept)
diff --git a/REORG.TODO/sysdeps/i386/fpu/halfulp.c b/REORG.TODO/sysdeps/i386/fpu/halfulp.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/halfulp.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/i386-math-asm.h b/REORG.TODO/sysdeps/i386/fpu/i386-math-asm.h
new file mode 100644
index 0000000000..6ffc8e6f64
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/i386-math-asm.h
@@ -0,0 +1,340 @@
+/* Helper macros for x86 libm functions.
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _I386_MATH_ASM_H
+#define _I386_MATH_ASM_H 1
+
+/* Remove excess range and precision by storing a value on the stack
+ and loading it back. */
+#define FLT_NARROW_EVAL \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fstps (%esp); \
+ flds (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4);
+#define DBL_NARROW_EVAL \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fstpl (%esp); \
+ fldl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8);
+
+/* Define constants for the minimum value of a floating-point
+ type. */
+#define DEFINE_FLT_MIN \
+ .section .rodata.cst4,"aM",@progbits,4; \
+ .p2align 2; \
+ .type flt_min,@object; \
+flt_min: \
+ .byte 0, 0, 0x80, 0; \
+ .size flt_min, .-flt_min;
+#define DEFINE_DBL_MIN \
+ .section .rodata.cst8,"aM",@progbits,8; \
+ .p2align 3; \
+ .type dbl_min,@object; \
+dbl_min: \
+ .byte 0, 0, 0, 0, 0, 0, 0x10, 0; \
+ .size dbl_min, .-dbl_min;
+#define DEFINE_LDBL_MIN \
+ .section .rodata.cst16,"aM",@progbits,16; \
+ .p2align 4; \
+ .type ldbl_min,@object; \
+ldbl_min: \
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x1, 0; \
+ .byte 0, 0, 0, 0, 0, 0; \
+ .size ldbl_min, .-ldbl_min;
+
+/* Remove excess range and precision by storing a value on the stack
+ and loading it back. The value is given to be nonnegative or NaN;
+ if it is subnormal, also force an underflow exception. The
+ relevant constant for the minimum of the type must have been
+ defined, the MO macro must have been defined for access to memory
+ operands, and, if PIC, the PIC register must have been loaded. */
+#define FLT_NARROW_EVAL_UFLOW_NONNEG_NAN \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ flds MO(flt_min); \
+ fld %st(1); \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+6424: fstps (%esp); \
+ flds (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4);
+#define DBL_NARROW_EVAL_UFLOW_NONNEG_NAN \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fldl MO(dbl_min); \
+ fld %st(1); \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+6453: fstpl (%esp); \
+ fldl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8);
+
+/* Likewise, but the argument is not a NaN (so fcom instructions,
+ which support memory operands, can be used). */
+#define FLT_NARROW_EVAL_UFLOW_NONNEG \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fcoms MO(flt_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+6424: fstps (%esp); \
+ flds (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4);
+#define DBL_NARROW_EVAL_UFLOW_NONNEG \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fcoml MO(dbl_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+6453: fstpl (%esp); \
+ fldl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8);
+
+/* Likewise, but the non-NaN argument may be negative. */
+#define FLT_NARROW_EVAL_UFLOW_NONNAN \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fld %st(0); \
+ fabs; \
+ fcomps MO(flt_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+6424: fstps (%esp); \
+ flds (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4);
+#define DBL_NARROW_EVAL_UFLOW_NONNAN \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fld %st(0); \
+ fabs; \
+ fcompl MO(dbl_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+6453: fstpl (%esp); \
+ fldl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8);
+
+/* Force an underflow exception if the given value is subnormal. The
+ relevant constant for the minimum of the type must have been
+ defined, the MO macro must have been defined for access to memory
+ operands, and, if PIC, the PIC register must have been loaded. */
+#define FLT_CHECK_FORCE_UFLOW \
+ flds MO(flt_min); \
+ fld %st(1); \
+ fabs; \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4); \
+6424:
+#define DBL_CHECK_FORCE_UFLOW \
+ fldl MO(dbl_min); \
+ fld %st(1); \
+ fabs; \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8); \
+6453:
+
+/* Likewise, but also remove excess range and precision if the value
+ is subnormal. */
+#define FLT_CHECK_FORCE_UFLOW_NARROW \
+ flds MO(flt_min); \
+ fld %st(1); \
+ fabs; \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+ fstps (%esp); \
+ flds (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4); \
+6424:
+#define DBL_CHECK_FORCE_UFLOW_NARROW \
+ fldl MO(dbl_min); \
+ fld %st(1); \
+ fabs; \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+ fstpl (%esp); \
+ fldl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8); \
+6453:
+
+/* Likewise, but the argument is nonnegative or NaN. */
+#define LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN \
+ fldt MO(ldbl_min); \
+ fld %st(1); \
+ fucompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6464f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstp %st(0); \
+6464:
+
+/* Likewise, but the argument is not a NaN. */
+#define FLT_CHECK_FORCE_UFLOW_NONNAN \
+ fld %st(0); \
+ fabs; \
+ fcomps MO(flt_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4); \
+6424:
+#define DBL_CHECK_FORCE_UFLOW_NONNAN \
+ fld %st(0); \
+ fabs; \
+ fcompl MO(dbl_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8); \
+6453:
+#define LDBL_CHECK_FORCE_UFLOW_NONNAN \
+ fldt MO(ldbl_min); \
+ fld %st(1); \
+ fabs; \
+ fcompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6464f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstp %st(0); \
+6464:
+
+/* Likewise, but the argument is nonnegative and not a NaN. */
+#define FLT_CHECK_FORCE_UFLOW_NONNEG \
+ fcoms MO(flt_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6424f; \
+ subl $4, %esp; \
+ cfi_adjust_cfa_offset (4); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstps (%esp); \
+ addl $4, %esp; \
+ cfi_adjust_cfa_offset (-4); \
+6424:
+#define DBL_CHECK_FORCE_UFLOW_NONNEG \
+ fcoml MO(dbl_min); \
+ fnstsw; \
+ sahf; \
+ jnc 6453f; \
+ subl $8, %esp; \
+ cfi_adjust_cfa_offset (8); \
+ fld %st(0); \
+ fmul %st(0); \
+ fstpl (%esp); \
+ addl $8, %esp; \
+ cfi_adjust_cfa_offset (-8); \
+6453:
+#define LDBL_CHECK_FORCE_UFLOW_NONNEG \
+ fldt MO(ldbl_min); \
+ fld %st(1); \
+ fcompp; \
+ fnstsw; \
+ sahf; \
+ jnc 6464f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstp %st(0); \
+6464:
+
+#endif /* i386-math-asm.h. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/libm-test-ulps b/REORG.TODO/sysdeps/i386/fpu/libm-test-ulps
new file mode 100644
index 0000000000..0fc50907ad
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/libm-test-ulps
@@ -0,0 +1,2202 @@
+# Begin of automatic generation
+
+# Maximal error of functions:
+Function: "acos":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_downward":
+ildouble: 2
+ldouble: 2
+
+Function: "acos_towardzero":
+ildouble: 2
+ldouble: 2
+
+Function: "acos_upward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "acosh":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 2
+
+Function: "acosh_downward":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 4
+
+Function: "acosh_towardzero":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 4
+
+Function: "acosh_upward":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 3
+
+Function: "asin":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_downward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "asin_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "asinh_downward":
+double: 1
+float: 1
+idouble: 1
+ildouble: 5
+ldouble: 5
+
+Function: "asinh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "asinh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: "atan":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atanh":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "atanh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 4
+
+Function: "atanh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 3
+
+Function: "atanh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: "cabs":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cabs_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cabs_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cabs_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "cacos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "cacos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "cacos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_upward":
+double: 7
+float: 7
+idouble: 7
+ifloat: 7
+ildouble: 7
+ldouble: 7
+
+Function: Real part of "cacosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacosh_downward":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "cacosh_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "carg":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casin_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "casin_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "casin_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casin_upward":
+double: 7
+float: 7
+idouble: 7
+ifloat: 7
+ildouble: 7
+ldouble: 7
+
+Function: Real part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casinh_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "casinh_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "casinh_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "casinh_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "casinh_upward":
+double: 7
+float: 7
+idouble: 7
+ifloat: 7
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "casinh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catan":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catan_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "catan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "catan_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catanh":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "cbrt_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "cbrt_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "cbrt_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccos_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccos_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccosh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccosh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccosh_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccosh_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccosh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccosh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cexp_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cexp_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "cexp_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cexp_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "cexp_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "clog":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog10":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "clog10":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog10_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 8
+ldouble: 8
+
+Function: Imaginary part of "clog10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog10_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 8
+ldouble: 8
+
+Function: Imaginary part of "clog10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog10_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "clog10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "clog_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "clog_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "clog_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "cos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cosh":
+double: 1
+float: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cosh_downward":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 3
+
+Function: "cosh_towardzero":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cosh_upward":
+double: 4
+float: 2
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 3
+
+Function: Real part of "cpow":
+double: 2
+float: 5
+idouble: 2
+ifloat: 5
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cpow":
+float: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "cpow_downward":
+double: 5
+float: 8
+idouble: 5
+ifloat: 8
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "cpow_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow_towardzero":
+double: 5
+float: 8
+idouble: 5
+ifloat: 8
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "cpow_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cpow_upward":
+double: 4
+float: 1
+idouble: 4
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cpow_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csin":
+float: 1
+ifloat: 1
+
+Function: Real part of "csin_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csin_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csin_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csinh":
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csinh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csinh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csinh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csinh_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csinh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csinh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "csqrt_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "csqrt_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "csqrt_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "csqrt_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "csqrt_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctan_towardzero":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctan_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctan_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctan_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctanh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctanh_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctanh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctanh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "erf":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erfc":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "erfc_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "erfc_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "erfc_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "exp":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_downward":
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_upward":
+ildouble: 1
+ldouble: 1
+
+Function: "exp_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "expm1":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "expm1_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "expm1_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "expm1_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "gamma":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "gamma_downward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "gamma_towardzero":
+double: 4
+float: 2
+idouble: 4
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+Function: "gamma_upward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "hypot":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j0":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "j0_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "j0_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: "j0_upward":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "j1":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j1_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "j1_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "j1_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "jn":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "jn_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "jn_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "jn_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "lgamma":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "lgamma_downward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma_towardzero":
+double: 4
+float: 2
+idouble: 4
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma_upward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "log":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log1p":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log1p_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "log1p_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "log1p_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "log2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log_downward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow_downward":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "pow_towardzero":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "pow_upward":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "sin":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sincos":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sincos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sincos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sincos_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sinh":
+double: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_downward":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 5
+
+Function: "sinh_towardzero":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 4
+
+Function: "sinh_upward":
+double: 4
+float: 2
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 5
+
+Function: "tan":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "tan_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "tan_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "tanh":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 7
+ldouble: 4
+
+Function: "tanh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 4
+
+Function: "tgamma":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "y0":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "y0_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "y0_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "y0_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "y1":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "y1_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+Function: "y1_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "y1_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+Function: "yn":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "yn_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "yn_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "yn_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+# end of automatic generation
diff --git a/REORG.TODO/sysdeps/i386/fpu/libm-test-ulps-name b/REORG.TODO/sysdeps/i386/fpu/libm-test-ulps-name
new file mode 100644
index 0000000000..54ca0d8295
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/libm-test-ulps-name
@@ -0,0 +1 @@
+ix86
diff --git a/REORG.TODO/sysdeps/i386/fpu/math-tests.h b/REORG.TODO/sysdeps/i386/fpu/math-tests.h
new file mode 100644
index 0000000000..26d0633dc0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/math-tests.h
@@ -0,0 +1,27 @@
+/* Configuration for math tests. 32-bit x86 version.
+ Copyright (C) 2013-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* On 32-bit x86, versions of GCC up to at least 4.8 are happy to use FPU load
+ instructions for sNaN values, and loading a float or double sNaN value will
+ already raise an INVALID exception as well as turn the sNaN into a qNaN,
+ rendering certain tests infeasible in this scenario.
+ <http://gcc.gnu.org/PR56831>. */
+#define SNAN_TESTS_float 0
+#define SNAN_TESTS_double 0
+
+#include_next <math-tests.h>
diff --git a/REORG.TODO/sysdeps/i386/fpu/math_private.h b/REORG.TODO/sysdeps/i386/fpu/math_private.h
new file mode 100644
index 0000000000..485214391f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/math_private.h
@@ -0,0 +1,7 @@
+#ifndef I386_MATH_PRIVATE_H
+#define I386_MATH_PRIVATE_H 1
+
+#include "fenv_private.h"
+#include_next <math_private.h>
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/mpatan.c b/REORG.TODO/sysdeps/i386/fpu/mpatan.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/mpatan.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/mpatan2.c b/REORG.TODO/sysdeps/i386/fpu/mpatan2.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/mpatan2.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/mpexp.c b/REORG.TODO/sysdeps/i386/fpu/mpexp.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/mpexp.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/mplog.c b/REORG.TODO/sysdeps/i386/fpu/mplog.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/mplog.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/mpsqrt.c b/REORG.TODO/sysdeps/i386/fpu/mpsqrt.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/mpsqrt.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_asinh.S b/REORG.TODO/sysdeps/i386/fpu/s_asinh.S
new file mode 100644
index 0000000000..1a60f7de2c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_asinh.S
@@ -0,0 +1,139 @@
+/* ix87 specific implementation of arcsinh.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type huge,@object
+huge: .double 1e+300
+ ASM_SIZE_DIRECTIVE(huge)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__asinh)
+ movl 8(%esp), %ecx
+ movl $0x7fffffff, %eax
+ andl %ecx, %eax
+ andl $0x80000000, %ecx
+ movl %eax, %edx
+ orl $0x800fffff, %edx
+ incl %edx
+ jz 7f // x in ±Inf or NaN
+ xorl %ecx, 8(%esp)
+ fldl 4(%esp) // |x|
+ cmpl $0x3e300000, %eax
+ jb 2f // |x| < 2^-28
+ fldln2 // log(2) : |x|
+ cmpl $0x41b00000, %eax
+ fxch // |x| : log(2)
+ ja 3f // |x| > 2^28
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpl $0x40000000, %eax
+ ja 5f // |x| > 2
+
+ // 2^-28 <= |x| <= 2 => y = sign(x)*log1p(|x|+|x|^2/(1+sqrt(1+|x|^2)))
+ fld %st // |x| : |x| : log(2)
+ fmul %st(1) // |x|^2 : |x| : log(2)
+ fld %st // |x|^2 : |x|^2 : |x| : log(2)
+ faddl MO(one) // 1+|x|^2 : |x|^2 : |x| : log(2)
+ fsqrt // sqrt(1+|x|^2) : |x|^2 : |x| : log(2)
+ faddl MO(one) // 1+sqrt(1+|x|^2) : |x|^2 : |x| : log(2)
+ fdivrp // |x|^2/(1+sqrt(1+|x|^2)) : |x| : log(2)
+ faddp // |x|+|x|^2/(1+sqrt(1+|x|^2)) : log(2)
+ fcoml MO(limit)
+ fnstsw
+ sahf
+ ja 6f
+ fyl2xp1
+ jecxz 4f
+ fchs
+4: ret
+
+7: fldl 4(%esp)
+ ret
+
+6: faddl MO(one)
+ fyl2x
+ jecxz 4f
+ fchs
+4: ret
+
+ // |x| < 2^-28 => y = x (inexact iff |x| != 0.0)
+ .align ALIGNARG(4)
+2:
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ jecxz 4f
+ fchs // x
+4: fld %st // x : x
+ faddl MO(huge) // huge+x : x
+ fstp %st(0) // x
+ cmpl $0x00100000, %eax
+ jae 8f
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fld %st(0)
+ fmul %st(0)
+ fstpl (%esp)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+8: ret
+
+ // |x| > 2^28 => y = sign(x) * (log(|x|) + log(2))
+ .align ALIGNARG(4)
+3: fyl2x // log(|x|)
+ fldln2 // log(2) : log(|x|)
+ faddp // log(|x|)+log(2)
+ jecxz 4f
+ fchs
+4: ret
+
+ // |x| > 2 => y = sign(x) * log(2*|x| + 1/(|x|+sqrt(x*x+1)))
+ .align ALIGNARG(4)
+5: fld %st // |x| : |x| : log(2)
+ fadd %st, %st(1) // |x| : 2*|x| : log(2)
+ fld %st // |x| : |x| : 2*|x| : log(2)
+ fmul %st(1) // |x|^2 : |x| : 2*|x| : log(2)
+ faddl MO(one) // 1+|x|^2 : |x| : 2*|x| : log(2)
+ fsqrt // sqrt(1+|x|^2) : |x| : 2*|x| : log(2)
+ faddp // |x|+sqrt(1+|x|^2) : 2*|x| : log(2)
+ fdivrl MO(one) // 1/(|x|+sqrt(1+|x|^2)) : 2*|x| : log(2)
+ faddp // 2*|x|+1/(|x|+sqrt(1+|x|^2)) : log(2)
+ fyl2x // log(2*|x|+1/(|x|+sqrt(1+|x|^2)))
+ jecxz 4f
+ fchs
+4: ret
+END(__asinh)
+weak_alias (__asinh, asinh)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_asinhf.S b/REORG.TODO/sysdeps/i386/fpu/s_asinhf.S
new file mode 100644
index 0000000000..12bcfef934
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_asinhf.S
@@ -0,0 +1,139 @@
+/* ix87 specific implementation of arcsinh.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type huge,@object
+huge: .double 1e+36
+ ASM_SIZE_DIRECTIVE(huge)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__asinhf)
+ movl 4(%esp), %ecx
+ movl $0x7fffffff, %eax
+ andl %ecx, %eax
+ andl $0x80000000, %ecx
+ movl %eax, %edx
+ orl $0x807fffff, %edx
+ incl %edx
+ jz 7f // x in ±Inf or NaN
+ xorl %ecx, 4(%esp)
+ flds 4(%esp) // |x|
+ cmpl $0x38000000, %eax
+ jb 2f // |x| < 2^-14
+ fldln2 // log(2) : |x|
+ cmpl $0x47000000, %eax
+ fxch // |x| : log(2)
+ ja 3f // |x| > 2^14
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpl $0x40000000, %eax
+ ja 5f // |x| > 2
+
+ // 2^-14 <= |x| <= 2 => y = sign(x)*log1p(|x|+|x|^2/(1+sqrt(1+|x|^2)))
+ fld %st // |x| : |x| : log(2)
+ fmul %st(1) // |x|^2 : |x| : log(2)
+ fld %st // |x|^2 : |x|^2 : |x| : log(2)
+ faddl MO(one) // 1+|x|^2 : |x|^2 : |x| : log(2)
+ fsqrt // sqrt(1+|x|^2) : |x|^2 : |x| : log(2)
+ faddl MO(one) // 1+sqrt(1+|x|^2) : |x|^2 : |x| : log(2)
+ fdivrp // |x|^2/(1+sqrt(1+|x|^2)) : |x| : log(2)
+ faddp // |x|+|x|^2/(1+sqrt(1+|x|^2)) : log(2)
+ fcoml MO(limit)
+ fnstsw
+ sahf
+ ja 6f
+ fyl2xp1
+ jecxz 4f
+ fchs
+4: ret
+
+7: flds 4(%esp)
+ ret
+
+6: faddl MO(one)
+ fyl2x
+ jecxz 4f
+ fchs
+4: ret
+
+ // |x| < 2^-14 => y = x (inexact iff |x| != 0.0)
+ .align ALIGNARG(4)
+2:
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ jecxz 4f
+ fchs // x
+4: fld %st // x : x
+ faddl MO(huge) // huge+x : x
+ fstp %st(0) // x
+ cmpl $0x00800000, %eax
+ jae 8f
+ subl $4, %esp
+ cfi_adjust_cfa_offset (4)
+ fld %st(0)
+ fmul %st(0)
+ fstps (%esp)
+ addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+8: ret
+
+ // |x| > 2^14 => y = sign(x) * (log(|x|) + log(2))
+ .align ALIGNARG(4)
+3: fyl2x // log(|x|)
+ fldln2 // log(2) : log(|x|)
+ faddp // log(|x|)+log(2)
+ jecxz 4f
+ fchs
+4: ret
+
+ // |x| > 2 => y = sign(x) * log(2*|x| + 1/(|x|+sqrt(x*x+1)))
+ .align ALIGNARG(4)
+5: fld %st // |x| : |x| : log(2)
+ fadd %st, %st(1) // |x| : 2*|x| : log(2)
+ fld %st // |x| : |x| : 2*|x| : log(2)
+ fmul %st(1) // |x|^2 : |x| : 2*|x| : log(2)
+ faddl MO(one) // 1+|x|^2 : |x| : 2*|x| : log(2)
+ fsqrt // sqrt(1+|x|^2) : |x| : 2*|x| : log(2)
+ faddp // |x|+sqrt(1+|x|^2) : 2*|x| : log(2)
+ fdivrl MO(one) // 1/(|x|+sqrt(1+|x|^2)) : 2*|x| : log(2)
+ faddp // 2*|x|+1/(|x|+sqrt(1+|x|^2)) : log(2)
+ fyl2x // log(2*|x|+1/(|x|+sqrt(1+|x|^2)))
+ jecxz 4f
+ fchs
+4: ret
+END(__asinhf)
+weak_alias (__asinhf, asinhf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_asinhl.S b/REORG.TODO/sysdeps/i386/fpu/s_asinhl.S
new file mode 100644
index 0000000000..f31a267e78
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_asinhl.S
@@ -0,0 +1,144 @@
+/* ix87 specific implementation of arcsinh.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type huge,@object
+huge: .tfloat 1e+4930
+ ASM_SIZE_DIRECTIVE(huge)
+ .align ALIGNARG(4)
+ /* Please note that we use double value for 1.0. This number
+ has an exact representation and so we don't get accuracy
+ problems. The advantage is that the code is simpler. */
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__asinhl)
+ movl 12(%esp), %ecx
+ movl $0x7fff, %eax
+ andl %ecx, %eax
+ andl $0x8000, %ecx
+ movl %eax, %edx
+ orl $0xffff8000, %edx
+ incl %edx
+ jz 7f // x in ±Inf or NaN
+ xorl %ecx, 12(%esp)
+ fldt 4(%esp) // |x|
+ cmpl $0x3fde, %eax
+ jb 2f // |x| < 2^-34
+ fldln2 // log(2) : |x|
+ cmpl $0x4020, %eax
+ fxch // |x| : log(2)
+ ja 3f // |x| > 2^34
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpl $0x4000, %eax
+ ja 5f // |x| > 2
+
+ // 2^-34 <= |x| <= 2 => y = sign(x)*log1p(|x|+|x|^2/(1+sqrt(1+|x|^2)))
+ fld %st // |x| : |x| : log(2)
+ fmul %st(1) // |x|^2 : |x| : log(2)
+ fld %st // |x|^2 : |x|^2 : |x| : log(2)
+ faddl MO(one) // 1+|x|^2 : |x|^2 : |x| : log(2)
+ fsqrt // sqrt(1+|x|^2) : |x|^2 : |x| : log(2)
+ faddl MO(one) // 1+sqrt(1+|x|^2) : |x|^2 : |x| : log(2)
+ fdivrp // |x|^2/(1+sqrt(1+|x|^2)) : |x| : log(2)
+ faddp // |x|+|x|^2/(1+sqrt(1+|x|^2)) : log(2)
+ fcoml MO(limit)
+ fnstsw
+ sahf
+ ja 6f
+ fyl2xp1
+ jecxz 4f
+ fchs
+4: ret
+
+7: fldt 4(%esp)
+ fadd %st
+ ret
+
+6: faddl MO(one)
+ fyl2x
+ jecxz 4f
+ fchs
+4: ret
+
+ // |x| < 2^-34 => y = x (inexact iff |x| != 0.0)
+ .align ALIGNARG(4)
+2:
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ jecxz 4f
+ fchs // x
+4: fld %st // x : x
+ fldt MO(huge) // huge : x : x
+ faddp // huge+x : x
+ fstp %st(0) // x
+ cmpl $0x0001, %eax
+ jae 8f
+ fld %st(0)
+ fmul %st(0)
+ fstp %st(0)
+8: ret
+
+ // |x| > 2^34 => y = sign(x) * (log(|x|) + log(2))
+ .align ALIGNARG(4)
+3: fyl2x // log(|x|)
+ fldln2 // log(2) : log(|x|)
+ faddp // log(|x|)+log(2)
+ jecxz 4f
+ fchs
+4: ret
+
+ // |x| > 2 => y = sign(x) * log(2*|x| + 1/(|x|+sqrt(x*x+1)))
+ .align ALIGNARG(4)
+5: fld %st // |x| : |x| : log(2)
+ fadd %st, %st(1) // |x| : 2*|x| : log(2)
+ fld %st // |x| : |x| : 2*|x| : log(2)
+ fmul %st(1) // |x|^2 : |x| : 2*|x| : log(2)
+ faddl MO(one) // 1+|x|^2 : |x| : 2*|x| : log(2)
+ fsqrt // sqrt(1+|x|^2) : |x| : 2*|x| : log(2)
+ faddp // |x|+sqrt(1+|x|^2) : 2*|x| : log(2)
+ fdivrl MO(one) // 1/(|x|+sqrt(1+|x|^2)) : 2*|x| : log(2)
+ faddp // 2*|x|+1/(|x|+sqrt(1+|x|^2)) : log(2)
+ fyl2x // log(2*|x|+1/(|x|+sqrt(1+|x|^2)))
+ jecxz 4f
+ fchs
+4: ret
+END(__asinhl)
+weak_alias (__asinhl, asinhl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_atan.S b/REORG.TODO/sysdeps/i386/fpu/s_atan.S
new file mode 100644
index 0000000000..644de78feb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_atan.S
@@ -0,0 +1,30 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: s_atan.S,v 1.4 1995/05/08 23:50:41 jtc Exp $")
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__atan)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ fldl 4(%esp)
+ fld1
+ fpatan
+ DBL_CHECK_FORCE_UFLOW
+ ret
+END (__atan)
+weak_alias (__atan, atan)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_atanf.S b/REORG.TODO/sysdeps/i386/fpu/s_atanf.S
new file mode 100644
index 0000000000..0589c1135e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_atanf.S
@@ -0,0 +1,30 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: s_atanf.S,v 1.3 1995/05/08 23:51:33 jtc Exp $")
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__atanf)
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+ flds 4(%esp)
+ fld1
+ fpatan
+ FLT_CHECK_FORCE_UFLOW
+ ret
+END (__atanf)
+weak_alias (__atanf, atanf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_atanl.c b/REORG.TODO/sysdeps/i386/fpu/s_atanl.c
new file mode 100644
index 0000000000..b7dba88aad
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_atanl.c
@@ -0,0 +1,22 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <math_private.h>
+
+long double
+__atanl (long double x)
+{
+ long double res;
+
+ asm ("fld1\n"
+ "fpatan"
+ : "=t" (res) : "0" (x));
+
+ return res;
+}
+
+weak_alias (__atanl, atanl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_cbrt.S b/REORG.TODO/sysdeps/i386/fpu/s_cbrt.S
new file mode 100644
index 0000000000..7f01659eae
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_cbrt.S
@@ -0,0 +1,200 @@
+/* Compute cubic root of double value.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
+ Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type f7,@object
+f7: .double -0.145263899385486377
+ ASM_SIZE_DIRECTIVE(f7)
+ .type f6,@object
+f6: .double 0.784932344976639262
+ ASM_SIZE_DIRECTIVE(f6)
+ .type f5,@object
+f5: .double -1.83469277483613086
+ ASM_SIZE_DIRECTIVE(f5)
+ .type f4,@object
+f4: .double 2.44693122563534430
+ ASM_SIZE_DIRECTIVE(f4)
+ .type f3,@object
+f3: .double -2.11499494167371287
+ ASM_SIZE_DIRECTIVE(f3)
+ .type f2,@object
+f2: .double 1.50819193781584896
+ ASM_SIZE_DIRECTIVE(f2)
+ .type f1,@object
+f1: .double 0.354895765043919860
+ ASM_SIZE_DIRECTIVE(f1)
+
+#define CBRT2 1.2599210498948731648
+#define ONE_CBRT2 0.793700525984099737355196796584
+#define SQR_CBRT2 1.5874010519681994748
+#define ONE_SQR_CBRT2 0.629960524947436582364439673883
+
+ .type factor,@object
+factor: .double ONE_SQR_CBRT2
+ .double ONE_CBRT2
+ .double 1.0
+ .double CBRT2
+ .double SQR_CBRT2
+ ASM_SIZE_DIRECTIVE(factor)
+
+ .type two54,@object
+two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
+ ASM_SIZE_DIRECTIVE(two54)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%ebx)
+#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
+#else
+#define MO(op) op
+#define MOX(op,x) op(x)
+#endif
+
+ .text
+ENTRY(__cbrt)
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+ orl %eax, %ecx
+ jz 1f
+ xorl %ecx, %ecx
+ cmpl $0x7ff00000, %eax
+ jae 1f
+
+#ifdef PIC
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ LOAD_PIC_REG (bx)
+#endif
+
+ cmpl $0x00100000, %eax
+ jae 2f
+
+#ifdef PIC
+ fldl 8(%esp)
+#else
+ fldl 4(%esp)
+#endif
+ fmull MO(two54)
+ movl $-54, %ecx
+#ifdef PIC
+ fstpl 8(%esp)
+ movl 12(%esp), %eax
+#else
+ fstpl 4(%esp)
+ movl 8(%esp), %eax
+#endif
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+
+2: shrl $20, %eax
+ andl $0x800fffff, %edx
+ subl $1022, %eax
+ orl $0x3fe00000, %edx
+ addl %eax, %ecx
+#ifdef PIC
+ movl %edx, 12(%esp)
+
+ fldl 8(%esp) /* xm */
+#else
+ movl %edx, 8(%esp)
+
+ fldl 4(%esp) /* xm */
+#endif
+ fabs
+
+ /* The following code has two tracks:
+ a) compute the normalized cbrt value
+ b) compute xe/3 and xe%3
+ The right track computes the value for b) and this is done
+ in an optimized way by avoiding division.
+
+ But why two tracks at all? Very easy: efficiency. Some FP
+ instruction can overlap with a certain amount of integer (and
+ FP) instructions. So we get (except for the imull) all
+ instructions for free. */
+
+ fld %st(0) /* xm : xm */
+
+ fmull MO(f7) /* f7*xm : xm */
+ movl $1431655766, %eax
+ faddl MO(f6) /* f6+f7*xm : xm */
+ imull %ecx
+ fmul %st(1) /* (f6+f7*xm)*xm : xm */
+ movl %ecx, %eax
+ faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */
+ sarl $31, %eax
+ fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */
+ subl %eax, %edx
+ faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
+ fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
+ faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
+ fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
+ faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
+ fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
+ faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
+
+ fld %st /* u : u : xm */
+ fmul %st(1) /* u*u : u : xm */
+ fld %st(2) /* xm : u*u : u : xm */
+ fadd %st /* 2*xm : u*u : u : xm */
+ fxch %st(1) /* u*u : 2*xm : u : xm */
+ fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
+ movl %edx, %eax
+ fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
+ leal (%edx,%edx,2),%edx
+ fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
+ subl %edx, %ecx
+ faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
+ shll $3, %ecx
+ fmulp /* u*(t2+2*xm) : 2*t2+xm */
+ fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
+ fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
+ fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
+ fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+#ifdef PIC
+ movl 12(%esp), %eax
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+#else
+ movl 8(%esp), %eax
+#endif
+ testl %eax, %eax
+ fstp %st(1)
+ jns 4f
+ fchs
+4: ret
+
+ /* Return the argument. */
+1: fldl 4(%esp)
+ ret
+END(__cbrt)
+weak_alias (__cbrt, cbrt)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_cbrtf.S b/REORG.TODO/sysdeps/i386/fpu/s_cbrtf.S
new file mode 100644
index 0000000000..645d24372d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_cbrtf.S
@@ -0,0 +1,177 @@
+/* Compute cubic root of float value.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
+ Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type f3,@object
+f3: .double 0.191502161678719066
+ ASM_SIZE_DIRECTIVE(f3)
+ .type f2,@object
+f2: .double 0.697570460207922770
+ ASM_SIZE_DIRECTIVE(f2)
+ .type f1,@object
+f1: .double 0.492659620528969547
+ ASM_SIZE_DIRECTIVE(f1)
+
+#define CBRT2 1.2599210498948731648
+#define ONE_CBRT2 0.793700525984099737355196796584
+#define SQR_CBRT2 1.5874010519681994748
+#define ONE_SQR_CBRT2 0.629960524947436582364439673883
+
+ .type factor,@object
+ .align ALIGNARG(4)
+factor: .double ONE_SQR_CBRT2
+ .double ONE_CBRT2
+ .double 1.0
+ .double CBRT2
+ .double SQR_CBRT2
+ ASM_SIZE_DIRECTIVE(factor)
+
+ .type two25,@object
+two25: .byte 0, 0, 0, 0x4c
+ ASM_SIZE_DIRECTIVE(two25)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%ebx)
+#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
+#else
+#define MO(op) op
+#define MOX(op,x) op(x)
+#endif
+
+ .text
+ENTRY(__cbrtf)
+ movl 4(%esp), %eax
+ xorl %ecx, %ecx
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+ jz 1f
+ cmpl $0x7f800000, %eax
+ jae 1f
+
+#ifdef PIC
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ LOAD_PIC_REG (bx)
+#endif
+
+ cmpl $0x00800000, %eax
+ jae 2f
+
+#ifdef PIC
+ flds 8(%esp)
+#else
+ flds 4(%esp)
+#endif
+ fmuls MO(two25)
+ movl $-25, %ecx
+#ifdef PIC
+ fstps 8(%esp)
+ movl 8(%esp), %eax
+#else
+ fstps 4(%esp)
+ movl 4(%esp), %eax
+#endif
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+
+2: shrl $23, %eax
+ andl $0x807fffff, %edx
+ subl $126, %eax
+ orl $0x3f000000, %edx
+ addl %eax, %ecx
+#ifdef PIC
+ movl %edx, 8(%esp)
+
+ flds 8(%esp) /* xm */
+#else
+ movl %edx, 4(%esp)
+
+ flds 4(%esp) /* xm */
+#endif
+ fabs
+
+ /* The following code has two tracks:
+ a) compute the normalized cbrt value
+ b) compute xe/3 and xe%3
+ The right track computes the value for b) and this is done
+ in an optimized way by avoiding division.
+
+ But why two tracks at all? Very easy: efficiency. Some FP
+ instruction can overlap with a certain amount of integer (and
+ FP) instructions. So we get (except for the imull) all
+ instructions for free. */
+
+ fld %st(0) /* xm : xm */
+ fmull MO(f3) /* f3*xm : xm */
+ movl $1431655766, %eax
+ fsubrl MO(f2) /* f2-f3*xm : xm */
+ imull %ecx
+ fmul %st(1) /* (f2-f3*xm)*xm : xm */
+ movl %ecx, %eax
+ faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */
+ sarl $31, %eax
+ fld %st /* u : u : xm */
+ subl %eax, %edx
+ fmul %st(1) /* u*u : u : xm */
+ fld %st(2) /* xm : u*u : u : xm */
+ fadd %st /* 2*xm : u*u : u : xm */
+ fxch %st(1) /* u*u : 2*xm : u : xm */
+ fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
+ movl %edx, %eax
+ fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
+ leal (%edx,%edx,2),%edx
+ fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
+ subl %edx, %ecx
+ faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
+ shll $3, %ecx
+ fmulp /* u*(t2+2*xm) : 2*t2+xm */
+ fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
+ fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
+ fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
+ fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+#ifdef PIC
+ movl 8(%esp), %eax
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+#else
+ movl 4(%esp), %eax
+#endif
+ testl %eax, %eax
+ fstp %st(1)
+ jns 4f
+ fchs
+4: ret
+
+ /* Return the argument. */
+1: flds 4(%esp)
+ ret
+END(__cbrtf)
+weak_alias (__cbrtf, cbrtf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_cbrtl.S b/REORG.TODO/sysdeps/i386/fpu/s_cbrtl.S
new file mode 100644
index 0000000000..e4a72d29c6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_cbrtl.S
@@ -0,0 +1,229 @@
+/* Compute cubic root of long double value.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
+ Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type f8,@object
+f8: .tfloat 0.161617097923756032
+ ASM_SIZE_DIRECTIVE(f8)
+ .align ALIGNARG(4)
+ .type f7,@object
+f7: .tfloat -0.988553671195413709
+ ASM_SIZE_DIRECTIVE(f7)
+ .align ALIGNARG(4)
+ .type f6,@object
+f6: .tfloat 2.65298938441952296
+ ASM_SIZE_DIRECTIVE(f6)
+ .align ALIGNARG(4)
+ .type f5,@object
+f5: .tfloat -4.11151425200350531
+ ASM_SIZE_DIRECTIVE(f5)
+ .align ALIGNARG(4)
+ .type f4,@object
+f4: .tfloat 4.09559907378707839
+ ASM_SIZE_DIRECTIVE(f4)
+ .align ALIGNARG(4)
+ .type f3,@object
+f3: .tfloat -2.82414939754975962
+ ASM_SIZE_DIRECTIVE(f3)
+ .align ALIGNARG(4)
+ .type f2,@object
+f2: .tfloat 1.67595307700780102
+ ASM_SIZE_DIRECTIVE(f2)
+ .align ALIGNARG(4)
+ .type f1,@object
+f1: .tfloat 0.338058687610520237
+ ASM_SIZE_DIRECTIVE(f1)
+
+#define CBRT2 1.2599210498948731648
+#define ONE_CBRT2 0.793700525984099737355196796584
+#define SQR_CBRT2 1.5874010519681994748
+#define ONE_SQR_CBRT2 0.629960524947436582364439673883
+
+ /* We make the entries in the following table all 16 bytes
+ wide to avoid having to implement a multiplication by 10. */
+ .type factor,@object
+ .align ALIGNARG(4)
+factor: .tfloat ONE_SQR_CBRT2
+ .byte 0, 0, 0, 0, 0, 0
+ .tfloat ONE_CBRT2
+ .byte 0, 0, 0, 0, 0, 0
+ .tfloat 1.0
+ .byte 0, 0, 0, 0, 0, 0
+ .tfloat CBRT2
+ .byte 0, 0, 0, 0, 0, 0
+ .tfloat SQR_CBRT2
+ ASM_SIZE_DIRECTIVE(factor)
+
+ .type two64,@object
+ .align ALIGNARG(4)
+two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
+ ASM_SIZE_DIRECTIVE(two64)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%ebx)
+#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
+#else
+#define MO(op) op
+#define MOX(op,x) op(x)
+#endif
+
+ .text
+ENTRY(__cbrtl)
+ movl 4(%esp), %ecx
+ movl 12(%esp), %eax
+ orl 8(%esp), %ecx
+ movl %eax, %edx
+ andl $0x7fff, %eax
+ orl %eax, %ecx
+ jz 1f
+ xorl %ecx, %ecx
+ cmpl $0x7fff, %eax
+ je 1f
+
+#ifdef PIC
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ LOAD_PIC_REG (bx)
+#endif
+
+ cmpl $0, %eax
+ jne 2f
+
+#ifdef PIC
+ fldt 8(%esp)
+#else
+ fldt 4(%esp)
+#endif
+ fmull MO(two64)
+ movl $-64, %ecx
+#ifdef PIC
+ fstpt 8(%esp)
+ movl 16(%esp), %eax
+#else
+ fstpt 4(%esp)
+ movl 12(%esp), %eax
+#endif
+ movl %eax, %edx
+ andl $0x7fff, %eax
+
+2: andl $0x8000, %edx
+ subl $16382, %eax
+ orl $0x3ffe, %edx
+ addl %eax, %ecx
+#ifdef PIC
+ movl %edx, 16(%esp)
+
+ fldt 8(%esp) /* xm */
+#else
+ movl %edx, 12(%esp)
+
+ fldt 4(%esp) /* xm */
+#endif
+ fabs
+
+ /* The following code has two tracks:
+ a) compute the normalized cbrt value
+ b) compute xe/3 and xe%3
+ The right track computes the value for b) and this is done
+ in an optimized way by avoiding division.
+
+ But why two tracks at all? Very easy: efficiency. Some FP
+ instruction can overlap with a certain amount of integer (and
+ FP) instructions. So we get (except for the imull) all
+ instructions for free. */
+
+ fldt MO(f8) /* f8 : xm */
+ fmul %st(1) /* f8*xm : xm */
+
+ fldt MO(f7)
+ faddp /* f7+f8*xm : xm */
+ fmul %st(1) /* (f7+f8*xm)*xm : xm */
+ movl $1431655766, %eax
+ fldt MO(f6)
+ faddp /* f6+(f7+f8*xm)*xm : xm */
+ imull %ecx
+ fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */
+ movl %ecx, %eax
+ fldt MO(f5)
+ faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */
+ sarl $31, %eax
+ fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
+ subl %eax, %edx
+ fldt MO(f4)
+ faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
+ fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
+ fldt MO(f3)
+ faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
+ fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
+ fldt MO(f2)
+ faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
+ fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
+ fldt MO(f1)
+ faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
+
+ fld %st /* u : u : xm */
+ fmul %st(1) /* u*u : u : xm */
+ fld %st(2) /* xm : u*u : u : xm */
+ fadd %st /* 2*xm : u*u : u : xm */
+ fxch %st(1) /* u*u : 2*xm : u : xm */
+ fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
+ movl %edx, %eax
+ fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
+ leal (%edx,%edx,2),%edx
+ fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
+ subl %edx, %ecx
+ faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
+ shll $4, %ecx
+ fmulp /* u*(t2+2*xm) : 2*t2+xm */
+ fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
+ fldt MOX(32+factor,%ecx)
+ fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */
+ pushl %eax
+ cfi_adjust_cfa_offset (4)
+ fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
+ fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
+ fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+#ifdef PIC
+ movl 16(%esp), %eax
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+#else
+ movl 12(%esp), %eax
+#endif
+ testl $0x8000, %eax
+ fstp %st(1)
+ jz 4f
+ fchs
+4: ret
+
+ /* Return the argument. */
+1: fldt 4(%esp)
+ fadd %st
+ ret
+END(__cbrtl)
+weak_alias (__cbrtl, cbrtl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_ceil.S b/REORG.TODO/sysdeps/i386/fpu/s_ceil.S
new file mode 100644
index 0000000000..1226bb2f87
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_ceil.S
@@ -0,0 +1,34 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_ceil.S,v 1.4 1995/05/08 23:52:13 jtc Exp $")
+
+ENTRY(__ceil)
+ fldl 4(%esp)
+ subl $32,%esp
+ cfi_adjust_cfa_offset (32)
+
+ fnstenv 4(%esp) /* store fpu environment */
+
+ /* We use here %edx although only the low 1 bits are defined.
+ But none of the operations should care and they are faster
+ than the 16 bit operations. */
+ movl $0x0800,%edx /* round towards +oo */
+ orl 4(%esp),%edx
+ andl $0xfbff,%edx
+ movl %edx,(%esp)
+ fldcw (%esp) /* load modified control word */
+
+ frndint /* round */
+
+ fldenv 4(%esp) /* restore original environment */
+
+ addl $32,%esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__ceil)
+weak_alias (__ceil, ceil)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_ceilf.S b/REORG.TODO/sysdeps/i386/fpu/s_ceilf.S
new file mode 100644
index 0000000000..d345c0973b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_ceilf.S
@@ -0,0 +1,34 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_ceilf.S,v 1.3 1995/05/08 23:52:44 jtc Exp $")
+
+ENTRY(__ceilf)
+ flds 4(%esp)
+ subl $32,%esp
+ cfi_adjust_cfa_offset (32)
+
+ fnstenv 4(%esp) /* store fpu environment */
+
+ /* We use here %edx although only the low 1 bits are defined.
+ But none of the operations should care and they are faster
+ than the 16 bit operations. */
+ movl $0x0800,%edx /* round towards +oo */
+ orl 4(%esp),%edx
+ andl $0xfbff,%edx
+ movl %edx,(%esp)
+ fldcw (%esp) /* load modified control word */
+
+ frndint /* round */
+
+ fldenv 4(%esp) /* restore original environment */
+
+ addl $32,%esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__ceilf)
+weak_alias (__ceilf, ceilf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_ceill.S b/REORG.TODO/sysdeps/i386/fpu/s_ceill.S
new file mode 100644
index 0000000000..7c08f43b24
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_ceill.S
@@ -0,0 +1,40 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: $")
+
+ENTRY(__ceill)
+ fldt 4(%esp)
+ subl $32,%esp
+ cfi_adjust_cfa_offset (32)
+
+ fnstenv 4(%esp) /* store fpu environment */
+
+ /* We use here %edx although only the low 1 bits are defined.
+ But none of the operations should care and they are faster
+ than the 16 bit operations. */
+ movl $0x0800,%edx /* round towards +oo */
+ orl 4(%esp),%edx
+ andl $0xfbff,%edx
+ movl %edx,(%esp)
+ fldcw (%esp) /* load modified control word */
+
+ frndint /* round */
+
+ /* Preserve "invalid" exceptions from sNaN input. */
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, 8(%esp)
+
+ fldenv 4(%esp) /* restore original environment */
+
+ addl $32,%esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__ceill)
+weak_alias (__ceill, ceill)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_copysign.S b/REORG.TODO/sysdeps/i386/fpu/s_copysign.S
new file mode 100644
index 0000000000..2520a94427
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_copysign.S
@@ -0,0 +1,20 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_copysign.S,v 1.4 1995/05/08 23:53:02 jtc Exp $")
+
+ENTRY(__copysign)
+ movl 16(%esp),%edx
+ movl 8(%esp),%eax
+ andl $0x80000000,%edx
+ andl $0x7fffffff,%eax
+ orl %edx,%eax
+ movl %eax,8(%esp)
+ fldl 4(%esp)
+ ret
+END (__copysign)
+weak_alias (__copysign, copysign)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_copysignf.S b/REORG.TODO/sysdeps/i386/fpu/s_copysignf.S
new file mode 100644
index 0000000000..57b1a6f119
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_copysignf.S
@@ -0,0 +1,20 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_copysignf.S,v 1.3 1995/05/08 23:53:25 jtc Exp $")
+
+ENTRY(__copysignf)
+ movl 8(%esp),%edx
+ movl 4(%esp),%eax
+ andl $0x80000000,%edx
+ andl $0x7fffffff,%eax
+ orl %edx,%eax
+ movl %eax,4(%esp)
+ flds 4(%esp)
+ ret
+END (__copysignf)
+weak_alias (__copysignf, copysignf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_copysignl.S b/REORG.TODO/sysdeps/i386/fpu/s_copysignl.S
new file mode 100644
index 0000000000..2163e7b014
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_copysignl.S
@@ -0,0 +1,21 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: $")
+
+ENTRY(__copysignl)
+ movl 24(%esp),%edx
+ movl 12(%esp),%eax
+ andl $0x8000,%edx
+ andl $0x7fff,%eax
+ orl %edx,%eax
+ movl %eax,12(%esp)
+ fldt 4(%esp)
+ ret
+END (__copysignl)
+weak_alias (__copysignl, copysignl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_expm1.S b/REORG.TODO/sysdeps/i386/fpu/s_expm1.S
new file mode 100644
index 0000000000..59fded2d5a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_expm1.S
@@ -0,0 +1,113 @@
+/* ix87 specific implementation of exp(x)-1.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+ Based on code by John C. Bowman <bowman@ipp-garching.mpg.de>.
+ Corrections by H.J. Lu (hjl@gnu.ai.mit.edu), 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+ /* Using: e^x - 1 = 2^(x * log2(e)) - 1 */
+
+#include <sysdep.h>
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type minus1,@object
+minus1: .double -1.0
+ ASM_SIZE_DIRECTIVE(minus1)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type l2e,@object
+l2e: .tfloat 1.442695040888963407359924681002
+ ASM_SIZE_DIRECTIVE(l2e)
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__expm1)
+ movzwl 4+6(%esp), %eax
+ xorb $0x80, %ah // invert sign bit (now 1 is "positive")
+ cmpl $0xc086, %eax // is num >= 704?
+ jae HIDDEN_JUMPTARGET (__exp)
+
+ fldl 4(%esp) // x
+ fxam // Is NaN, +-Inf or +-0?
+ xorb $0x80, %ah
+ cmpl $0xc043, %eax // is num <= -38.0?
+ fstsw %ax
+ movb $0x45, %ch
+ jb 4f
+
+ // Below -38.0 (may be -NaN or -Inf).
+ andb %ah, %ch
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpb $0x01, %ch
+ je 5f // If -NaN, jump.
+ jmp 2f // -large, possibly -Inf.
+
+4: // In range -38.0 to 704.0 (may be +-0 but not NaN or +-Inf).
+ andb %ah, %ch
+ cmpb $0x40, %ch
+ je 3f // If +-0, jump.
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+5: fldt MO(l2e) // log2(e) : x
+ fmulp // log2(e)*x
+ fld %st // log2(e)*x : log2(e)*x
+ // Set round-to-nearest temporarily.
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fstcw 4(%esp)
+ movl $0xf3ff, %ecx
+ andl 4(%esp), %ecx
+ movl %ecx, (%esp)
+ fldcw (%esp)
+ frndint // int(log2(e)*x) : log2(e)*x
+ fldcw 4(%esp)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fsubr %st, %st(1) // int(log2(e)*x) : fract(log2(e)*x)
+ fxch // fract(log2(e)*x) : int(log2(e)*x)
+ f2xm1 // 2^fract(log2(e)*x)-1 : int(log2(e)*x)
+ fscale // 2^(log2(e)*x)-2^int(log2(e)*x) : int(log2(e)*x)
+ fxch // int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fldl MO(one) // 1 : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fscale // 2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fsubrl MO(one) // 1-2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fstp %st(1) // 1-2^int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fsubrp %st, %st(1) // 2^(log2(e)*x)
+ DBL_CHECK_FORCE_UFLOW
+ ret
+
+2: fstp %st
+ fldl MO(minus1) // Set result to -1.0.
+3: ret
+END(__expm1)
+weak_alias (__expm1, expm1)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_expm1f.S b/REORG.TODO/sysdeps/i386/fpu/s_expm1f.S
new file mode 100644
index 0000000000..4f0b2e7832
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_expm1f.S
@@ -0,0 +1,113 @@
+/* ix87 specific implementation of exp(x)-1.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+ Based on code by John C. Bowman <bowman@ipp-garching.mpg.de>.
+ Corrections by H.J. Lu (hjl@gnu.ai.mit.edu), 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+ /* Using: e^x - 1 = 2^(x * log2(e)) - 1 */
+
+#include <sysdep.h>
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type minus1,@object
+minus1: .double -1.0
+ ASM_SIZE_DIRECTIVE(minus1)
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ .type l2e,@object
+l2e: .tfloat 1.442695040888963407359924681002
+ ASM_SIZE_DIRECTIVE(l2e)
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+ .text
+ENTRY(__expm1f)
+ movzwl 4+2(%esp), %eax
+ xorb $0x80, %ah // invert sign bit (now 1 is "positive")
+ cmpl $0xc2b1, %eax // is num >= 88.5?
+ jae HIDDEN_JUMPTARGET (__expf)
+
+ flds 4(%esp) // x
+ fxam // Is NaN, +-Inf or +-0?
+ xorb $0x80, %ah
+ cmpl $0xc190, %eax // is num <= -18.0?
+ fstsw %ax
+ movb $0x45, %ch
+ jb 4f
+
+ // Below -18.0 (may be -NaN or -Inf).
+ andb %ah, %ch
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ cmpb $0x01, %ch
+ je 5f // If -NaN, jump.
+ jmp 2f // -large, possibly -Inf.
+
+4: // In range -18.0 to 88.5 (may be +-0 but not NaN or +-Inf).
+ andb %ah, %ch
+ cmpb $0x40, %ch
+ je 3f // If +-0, jump.
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+5: fldt MO(l2e) // log2(e) : x
+ fmulp // log2(e)*x
+ fld %st // log2(e)*x : log2(e)*x
+ // Set round-to-nearest temporarily.
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fstcw 4(%esp)
+ movl $0xf3ff, %ecx
+ andl 4(%esp), %ecx
+ movl %ecx, (%esp)
+ fldcw (%esp)
+ frndint // int(log2(e)*x) : log2(e)*x
+ fldcw 4(%esp)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fsubr %st, %st(1) // int(log2(e)*x) : fract(log2(e)*x)
+ fxch // fract(log2(e)*x) : int(log2(e)*x)
+ f2xm1 // 2^fract(log2(e)*x)-1 : int(log2(e)*x)
+ fscale // 2^(log2(e)*x)-2^int(log2(e)*x) : int(log2(e)*x)
+ fxch // int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fldl MO(one) // 1 : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fscale // 2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fsubrl MO(one) // 1-2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fstp %st(1) // 1-2^int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
+ fsubrp %st, %st(1) // 2^(log2(e)*x)
+ FLT_CHECK_FORCE_UFLOW
+ ret
+
+2: fstp %st
+ fldl MO(minus1) // Set result to -1.0.
+3: ret
+END(__expm1f)
+weak_alias (__expm1f, expm1f)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_expm1l.S b/REORG.TODO/sysdeps/i386/fpu/s_expm1l.S
new file mode 100644
index 0000000000..7fbd99b0db
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_expm1l.S
@@ -0,0 +1,2 @@
+#define USE_AS_EXPM1L
+#include <e_expl.S>
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fabs.S b/REORG.TODO/sysdeps/i386/fpu/s_fabs.S
new file mode 100644
index 0000000000..23ae9dccb9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fabs.S
@@ -0,0 +1,9 @@
+#include <sysdep.h>
+
+ .text
+ENTRY(__fabs)
+ fldl 4(%esp)
+ fabs
+ ret
+END(__fabs)
+weak_alias (__fabs, fabs)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fabsf.S b/REORG.TODO/sysdeps/i386/fpu/s_fabsf.S
new file mode 100644
index 0000000000..c0407a8839
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fabsf.S
@@ -0,0 +1,9 @@
+#include <sysdep.h>
+
+ .text
+ENTRY(__fabsf)
+ flds 4(%esp)
+ fabs
+ ret
+END(__fabsf)
+weak_alias (__fabsf, fabsf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fabsl.S b/REORG.TODO/sysdeps/i386/fpu/s_fabsl.S
new file mode 100644
index 0000000000..a12a3e050b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fabsl.S
@@ -0,0 +1,9 @@
+#include <sysdep.h>
+
+ .text
+ENTRY(__fabsl)
+ fldt 4(%esp)
+ fabs
+ ret
+END(__fabsl)
+weak_alias (__fabsl, fabsl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fdim.c b/REORG.TODO/sysdeps/i386/fpu/s_fdim.c
new file mode 100644
index 0000000000..6243c62998
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fdim.c
@@ -0,0 +1,50 @@
+/* Return positive difference between arguments. i386 version.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fpu_control.h>
+#include <math.h>
+#include <math_private.h>
+
+double
+__fdim (double x, double y)
+{
+ if (islessequal (x, y))
+ return 0.0;
+
+ /* To avoid double rounding, set double precision for the
+ subtraction. math_narrow_eval is still needed to eliminate
+ excess range in the case of overflow. If the result of the
+ subtraction is in the subnormal range for double, it is exact, so
+ no issues of double rounding for subnormals arise. */
+ fpu_control_t cw, cw_double;
+ _FPU_GETCW (cw);
+ cw_double = (cw & ~_FPU_EXTENDED) | _FPU_DOUBLE;
+ _FPU_SETCW (cw_double);
+ double r = math_narrow_eval (x - y);
+ _FPU_SETCW (cw);
+ if (isinf (r) && !isinf (x) && !isinf (y))
+ __set_errno (ERANGE);
+
+ return r;
+}
+weak_alias (__fdim, fdim)
+#ifdef NO_LONG_DOUBLE
+strong_alias (__fdim, __fdiml)
+weak_alias (__fdim, fdiml)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_finite.S b/REORG.TODO/sysdeps/i386/fpu/s_finite.S
new file mode 100644
index 0000000000..1ae4aed451
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_finite.S
@@ -0,0 +1,17 @@
+/*
+ * Written by Joe Keane <jgk@jgk.org>.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__finite)
+ movl 8(%esp),%eax
+ movl $0xFFEFFFFF,%ecx
+ subl %eax,%ecx
+ xorl %ecx,%eax
+ shrl $31, %eax
+ ret
+END (__finite)
+weak_alias (__finite, finite)
+hidden_def (__finite)
+
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_finitef.S b/REORG.TODO/sysdeps/i386/fpu/s_finitef.S
new file mode 100644
index 0000000000..69e72facff
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_finitef.S
@@ -0,0 +1,16 @@
+/*
+ * Written by Joe Keane <jgk@jgk.org>.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__finitef)
+ movl 4(%esp),%eax
+ movl $0xFF7FFFFF,%ecx
+ subl %eax,%ecx
+ xorl %ecx,%eax
+ shrl $31,%eax
+ ret
+END (__finitef)
+weak_alias (__finitef, finitef)
+hidden_def (__finitef)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_finitel.S b/REORG.TODO/sysdeps/i386/fpu/s_finitel.S
new file mode 100644
index 0000000000..cce90e18fc
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_finitel.S
@@ -0,0 +1,15 @@
+/*
+ * Written by Joe Keane <jgk@jgk.org>.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(__finitel)
+ movl 12(%esp),%eax
+ orl $0xffff8000, %eax
+ incl %eax
+ shrl $31, %eax
+ ret
+END (__finitel)
+weak_alias (__finitel, finitel)
+hidden_def (__finitel)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_floor.S b/REORG.TODO/sysdeps/i386/fpu/s_floor.S
new file mode 100644
index 0000000000..ed837dae40
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_floor.S
@@ -0,0 +1,34 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_floor.S,v 1.4 1995/05/09 00:01:59 jtc Exp $")
+
+ENTRY(__floor)
+ fldl 4(%esp)
+ subl $32,%esp
+ cfi_adjust_cfa_offset (32)
+
+ fnstenv 4(%esp) /* store fpu environment */
+
+ /* We use here %edx although only the low 1 bits are defined.
+ But none of the operations should care and they are faster
+ than the 16 bit operations. */
+ movl $0x400,%edx /* round towards -oo */
+ orl 4(%esp),%edx
+ andl $0xf7ff,%edx
+ movl %edx,(%esp)
+ fldcw (%esp) /* load modified control word */
+
+ frndint /* round */
+
+ fldenv 4(%esp) /* restore original environment */
+
+ addl $32,%esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__floor)
+weak_alias (__floor, floor)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_floorf.S b/REORG.TODO/sysdeps/i386/fpu/s_floorf.S
new file mode 100644
index 0000000000..84b6f7ed99
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_floorf.S
@@ -0,0 +1,34 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_floorf.S,v 1.3 1995/05/09 00:04:32 jtc Exp $")
+
+ENTRY(__floorf)
+ flds 4(%esp)
+ subl $32,%esp
+ cfi_adjust_cfa_offset (32)
+
+ fnstenv 4(%esp) /* store fpu environment */
+
+ /* We use here %edx although only the low 1 bits are defined.
+ But none of the operations should care and they are faster
+ than the 16 bit operations. */
+ movl $0x400,%edx /* round towards -oo */
+ orl 4(%esp),%edx
+ andl $0xf7ff,%edx
+ movl %edx,(%esp)
+ fldcw (%esp) /* load modified control word */
+
+ frndint /* round */
+
+ fldenv 4(%esp) /* restore original environment */
+
+ addl $32,%esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__floorf)
+weak_alias (__floorf, floorf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_floorl.S b/REORG.TODO/sysdeps/i386/fpu/s_floorl.S
new file mode 100644
index 0000000000..dc74a0c446
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_floorl.S
@@ -0,0 +1,40 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: $")
+
+ENTRY(__floorl)
+ fldt 4(%esp)
+ subl $32,%esp
+ cfi_adjust_cfa_offset (32)
+
+ fnstenv 4(%esp) /* store fpu environment */
+
+ /* We use here %edx although only the low 1 bits are defined.
+ But none of the operations should care and they are faster
+ than the 16 bit operations. */
+ movl $0x400,%edx /* round towards -oo */
+ orl 4(%esp),%edx
+ andl $0xf7ff,%edx
+ movl %edx,(%esp)
+ fldcw (%esp) /* load modified control word */
+
+ frndint /* round */
+
+ /* Preserve "invalid" exceptions from sNaN input. */
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, 8(%esp)
+
+ fldenv 4(%esp) /* restore original environment */
+
+ addl $32,%esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__floorl)
+weak_alias (__floorl, floorl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fmax.S b/REORG.TODO/sysdeps/i386/fpu/s_fmax.S
new file mode 100644
index 0000000000..218dcef421
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fmax.S
@@ -0,0 +1,43 @@
+/* Compute maximum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmax)
+ fldl 12(%esp) // y
+ fxam
+ fnstsw
+ fldl 4(%esp) // y : x
+
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 1f // y == NaN
+
+ fucom %st(1)
+ fnstsw
+ sahf
+ jnc 1f
+
+ fxch %st(1)
+1: fstp %st(1)
+
+ ret
+END(__fmax)
+weak_alias (__fmax, fmax)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fmaxf.S b/REORG.TODO/sysdeps/i386/fpu/s_fmaxf.S
new file mode 100644
index 0000000000..b7a00cefeb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fmaxf.S
@@ -0,0 +1,43 @@
+/* Compute maximum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmaxf)
+ flds 8(%esp) // y
+ fxam
+ fnstsw
+ flds 4(%esp) // y : x
+
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 1f // y == NaN
+
+ fucom %st(1)
+ fnstsw
+ sahf
+ jnc 1f
+
+ fxch %st(1)
+1: fstp %st(1)
+
+ ret
+END(__fmaxf)
+weak_alias (__fmaxf, fmaxf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fmaxl.S b/REORG.TODO/sysdeps/i386/fpu/s_fmaxl.S
new file mode 100644
index 0000000000..68162921db
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fmaxl.S
@@ -0,0 +1,71 @@
+/* Compute maximum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmaxl)
+ fldt 16(%esp) // y
+ fxam
+ fnstsw
+ fldt 4(%esp) // y : x
+
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 2f // y == NaN
+
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 3f // x == NaN
+
+ fucom %st(1)
+ fnstsw
+ sahf
+ jnc 1f
+
+ fxch %st(1)
+1: fstp %st(1)
+
+ ret
+
+2: // st(1) is a NaN; st(0) may or may not be.
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 4f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 23(%esp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+3: // st(0) is a NaN; st(1) is not. Test if st(0) is signaling.
+ testb $0x40, 11(%esp)
+ jz 4f
+ fstp %st(0)
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ faddp
+ ret
+END(__fmaxl)
+weak_alias (__fmaxl, fmaxl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fmin.S b/REORG.TODO/sysdeps/i386/fpu/s_fmin.S
new file mode 100644
index 0000000000..a5bb0e06dd
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fmin.S
@@ -0,0 +1,43 @@
+/* Compute minimum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmin)
+ fldl 4(%esp) // x
+ fldl 12(%esp) // x : y
+
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 1f // y == NaN
+
+ fucom %st(1)
+ fnstsw
+ sahf
+ jc 2f
+
+1: fxch %st(1)
+2: fstp %st(1)
+
+ ret
+END(__fmin)
+weak_alias (__fmin, fmin)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fminf.S b/REORG.TODO/sysdeps/i386/fpu/s_fminf.S
new file mode 100644
index 0000000000..fba4a41120
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fminf.S
@@ -0,0 +1,43 @@
+/* Compute minimum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fminf)
+ flds 4(%esp) // x
+ flds 8(%esp) // x : y
+
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 1f // y == NaN
+
+ fucom %st(1)
+ fnstsw
+ sahf
+ jc 2f
+
+1: fxch %st(1)
+2: fstp %st(1)
+
+ ret
+END(__fminf)
+weak_alias (__fminf, fminf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fminl.S b/REORG.TODO/sysdeps/i386/fpu/s_fminl.S
new file mode 100644
index 0000000000..12ef21fda9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fminl.S
@@ -0,0 +1,71 @@
+/* Compute minimum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fminl)
+ fldt 16(%esp) // y
+ fxam
+ fnstsw
+ fldt 4(%esp) // y : x
+
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 2f // y == NaN
+
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 3f // x == NaN
+
+ fucom %st(1)
+ fnstsw
+ sahf
+ jc 1f
+
+ fxch %st(1)
+1: fstp %st(1)
+
+ ret
+
+2: // st(1) is a NaN; st(0) may or may not be.
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 4f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 23(%esp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+3: // st(0) is a NaN; st(1) is not. Test if st(0) is signaling.
+ testb $0x40, 11(%esp)
+ jz 4f
+ fstp %st(0)
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ faddp
+ ret
+END(__fminl)
+weak_alias (__fminl, fminl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_fpclassifyl.c b/REORG.TODO/sysdeps/i386/fpu/s_fpclassifyl.c
new file mode 100644
index 0000000000..ce19fd0035
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_fpclassifyl.c
@@ -0,0 +1,42 @@
+/* Return classification value corresponding to argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+
+#include <math_private.h>
+
+
+int
+__fpclassifyl (long double x)
+{
+ u_int32_t ex, hx, lx;
+ int retval = FP_NORMAL;
+
+ GET_LDOUBLE_WORDS (ex, hx, lx, x);
+ ex &= 0x7fff;
+ if ((ex | lx | hx) == 0)
+ retval = FP_ZERO;
+ else if (ex == 0 && (hx & 0x80000000) == 0)
+ retval = FP_SUBNORMAL;
+ else if (ex == 0x7fff)
+ retval = ((hx & 0x7fffffff) | lx) != 0 ? FP_NAN : FP_INFINITE;
+
+ return retval;
+}
+libm_hidden_def (__fpclassifyl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_frexp.S b/REORG.TODO/sysdeps/i386/fpu/s_frexp.S
new file mode 100644
index 0000000000..104f733bf6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_frexp.S
@@ -0,0 +1,83 @@
+/* ix87 specific frexp implementation for double.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type two54,@object
+two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
+ ASM_SIZE_DIRECTIVE(two54)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+#define PARMS 4 /* no space for saved regs */
+#define VAL0 PARMS
+#define VAL1 VAL0+4
+#define EXPP VAL1+4
+
+ .text
+ENTRY (__frexp)
+
+ movl VAL0(%esp), %ecx
+ movl VAL1(%esp), %eax
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+ orl %eax, %ecx
+ jz 1f
+ xorl %ecx, %ecx
+ cmpl $0x7ff00000, %eax
+ jae 1f
+
+ cmpl $0x00100000, %eax
+ jae 2f
+
+ fldl VAL0(%esp)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fmull MO(two54)
+ movl $-54, %ecx
+ fstpl VAL0(%esp)
+ fwait
+ movl VAL1(%esp), %eax
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+
+2: shrl $20, %eax
+ andl $0x800fffff, %edx
+ subl $1022, %eax
+ orl $0x3fe00000, %edx
+ addl %eax, %ecx
+ movl %edx, VAL1(%esp)
+
+ /* Store %ecx in the variable pointed to by the second argument,
+ get the factor from the stack and return. */
+1: movl EXPP(%esp), %eax
+ fldl VAL0(%esp)
+ movl %ecx, (%eax)
+
+ ret
+END (__frexp)
+weak_alias (__frexp, frexp)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_frexpf.S b/REORG.TODO/sysdeps/i386/fpu/s_frexpf.S
new file mode 100644
index 0000000000..f21c39ec4b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_frexpf.S
@@ -0,0 +1,80 @@
+/* ix87 specific frexp implementation for float.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type two25,@object
+two25: .byte 0, 0, 0, 0x4c
+ ASM_SIZE_DIRECTIVE(two25)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+#define PARMS 4 /* no space for saved regs */
+#define VAL PARMS
+#define EXPP VAL+4
+
+ .text
+ENTRY (__frexpf)
+
+ movl VAL(%esp), %eax
+ xorl %ecx, %ecx
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+ jz 1f
+ cmpl $0x7f800000, %eax
+ jae 1f
+
+ cmpl $0x00800000, %eax
+ jae 2f
+
+ flds VAL(%esp)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fmuls MO(two25)
+ movl $-25, %ecx
+ fstps VAL(%esp)
+ fwait
+ movl VAL(%esp), %eax
+ movl %eax, %edx
+ andl $0x7fffffff, %eax
+
+2: shrl $23, %eax
+ andl $0x807fffff, %edx
+ subl $126, %eax
+ orl $0x3f000000, %edx
+ addl %eax, %ecx
+ movl %edx, VAL(%esp)
+
+ /* Store %ecx in the variable pointed to by the second argument,
+ get the factor from the stack and return. */
+1: movl EXPP(%esp), %eax
+ flds VAL(%esp)
+ movl %ecx, (%eax)
+
+ ret
+END (__frexpf)
+weak_alias (__frexpf, frexpf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_frexpl.S b/REORG.TODO/sysdeps/i386/fpu/s_frexpl.S
new file mode 100644
index 0000000000..04f28888d2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_frexpl.S
@@ -0,0 +1,92 @@
+/* ix87 specific frexp implementation for long double.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ .type two64,@object
+two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
+ ASM_SIZE_DIRECTIVE(two64)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%edx)
+#else
+#define MO(op) op
+#endif
+
+#define PARMS 4 /* no space for saved regs */
+#define VAL0 PARMS
+#define VAL1 VAL0+4
+#define VAL2 VAL1+4
+#define EXPP VAL2+4
+
+ .text
+ENTRY (__frexpl)
+
+ movl VAL0(%esp), %ecx
+ movl VAL2(%esp), %eax
+ orl VAL1(%esp), %ecx
+ movl %eax, %edx
+ andl $0x7fff, %eax
+ orl %eax, %ecx
+ jz 1f
+ xorl %ecx, %ecx
+ cmpl $0x7fff, %eax
+ je 3f
+
+ cmpl $0, %eax
+ jne 2f
+
+ fldt VAL0(%esp)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ fmull MO(two64) /* It's not necessary to use a 80bit factor */
+ movl $-64, %ecx
+ fstpt VAL0(%esp)
+ fwait
+ movl VAL2(%esp), %eax
+ movl %eax, %edx
+ andl $0x7fff, %eax
+
+2: andl $0x8000, %edx
+ subl $16382, %eax
+ orl $0x3ffe, %edx
+ addl %eax, %ecx
+ movl %edx, VAL2(%esp)
+
+ /* Store %ecx in the variable pointed to by the second argument,
+ get the factor from the stack and return. */
+1: movl EXPP(%esp), %eax
+ fldt VAL0(%esp)
+ movl %ecx, (%eax)
+
+ ret
+
+ /* Infinity or NaN; ensure signaling NaNs are quieted. */
+3: movl EXPP(%esp), %eax
+ fldt VAL0(%esp)
+ fadd %st
+ movl %ecx, (%eax)
+ ret
+END (__frexpl)
+weak_alias (__frexpl, frexpl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_isinfl.c b/REORG.TODO/sysdeps/i386/fpu/s_isinfl.c
new file mode 100644
index 0000000000..cdd77183fa
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_isinfl.c
@@ -0,0 +1,32 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Change for long double by Ulrich Drepper <drepper@cygnus.com>.
+ * Intel i387 specific version.
+ * Public domain.
+ */
+
+#if defined(LIBM_SCCS) && !defined(lint)
+static char rcsid[] = "$NetBSD: $";
+#endif
+
+/*
+ * isinfl(x) returns 1 if x is inf, -1 if x is -inf, else 0;
+ * no branching!
+ */
+
+#include <math.h>
+#include <math_private.h>
+
+int __isinfl(long double x)
+{
+ int32_t se,hx,lx;
+ GET_LDOUBLE_WORDS(se,hx,lx,x);
+ /* This additional ^ 0x80000000 is necessary because in Intel's
+ internal representation of the implicit one is explicit. */
+ lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
+ lx |= -lx;
+ se &= 0x8000;
+ return ~(lx >> 31) & (1 - (se >> 14));
+}
+hidden_def (__isinfl)
+weak_alias (__isinfl, isinfl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_isnanl.c b/REORG.TODO/sysdeps/i386/fpu/s_isnanl.c
new file mode 100644
index 0000000000..816396d8fb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_isnanl.c
@@ -0,0 +1,43 @@
+/* s_isnanl.c -- long double version for i387 of s_isnan.c.
+ * Conversion to long double by Ulrich Drepper,
+ * Cygnus Support, drepper@cygnus.com.
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#if defined(LIBM_SCCS) && !defined(lint)
+static char rcsid[] = "$NetBSD: $";
+#endif
+
+/*
+ * isnanl(x) returns 1 is x is nan, else 0;
+ * no branching!
+ */
+
+#include <math.h>
+#include <math_private.h>
+
+int __isnanl(long double x)
+{
+ int32_t se,hx,lx;
+ GET_LDOUBLE_WORDS(se,hx,lx,x);
+ se = (se & 0x7fff) << 1;
+ /* The additional & 0x7fffffff is required because Intel's
+ extended format has the normally implicit 1 explicit
+ present. Sigh! */
+ lx |= hx & 0x7fffffff;
+ se |= (u_int32_t)(lx|(-lx))>>31;
+ se = 0xfffe - se;
+ return (int)((u_int32_t)(se))>>16;
+}
+hidden_def (__isnanl)
+weak_alias (__isnanl, isnanl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_llrint.S b/REORG.TODO/sysdeps/i386/fpu/s_llrint.S
new file mode 100644
index 0000000000..a597183aab
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_llrint.S
@@ -0,0 +1,36 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__llrint)
+ fldl 4(%esp)
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fistpll (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ ret
+END(__llrint)
+weak_alias (__llrint, llrint)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_llrintf.S b/REORG.TODO/sysdeps/i386/fpu/s_llrintf.S
new file mode 100644
index 0000000000..a4b574eccb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_llrintf.S
@@ -0,0 +1,36 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__llrintf)
+ flds 4(%esp)
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fistpll (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ ret
+END(__llrintf)
+weak_alias (__llrintf, llrintf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_llrintl.S b/REORG.TODO/sysdeps/i386/fpu/s_llrintl.S
new file mode 100644
index 0000000000..7b48c02ef4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_llrintl.S
@@ -0,0 +1,36 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__llrintl)
+ fldt 4(%esp)
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ fistpll (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ ret
+END(__llrintl)
+weak_alias (__llrintl, llrintl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_log1p.S b/REORG.TODO/sysdeps/i386/fpu/s_log1p.S
new file mode 100644
index 0000000000..7978e76095
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_log1p.S
@@ -0,0 +1,67 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $")
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ /* The fyl2xp1 can only be used for values in
+ -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2
+ 0.29 is a safe value.
+ */
+limit: .double 0.29
+one: .double 1.0
+
+DEFINE_DBL_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+/*
+ * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29,
+ * otherwise fyl2x with the needed extra computation.
+ */
+ .text
+ENTRY(__log1p)
+ fldln2
+
+ fldl 4(%esp)
+
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ fxam
+ fnstsw
+ fld %st
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fabs
+ fcompl MO(limit)
+ fnstsw
+ sahf
+ jc 2f
+
+ faddl MO(one)
+ fyl2x
+ ret
+
+2: fyl2xp1
+ DBL_CHECK_FORCE_UFLOW_NONNAN
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+
+END (__log1p)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_log1pf.S b/REORG.TODO/sysdeps/i386/fpu/s_log1pf.S
new file mode 100644
index 0000000000..acaa299d94
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_log1pf.S
@@ -0,0 +1,67 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: s_log1pf.S,v 1.4 1995/05/09 00:13:05 jtc Exp $")
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ /* The fyl2xp1 can only be used for values in
+ -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2
+ 0.29 is a safe value.
+ */
+limit: .float 0.29
+one: .float 1.0
+
+DEFINE_FLT_MIN
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+/*
+ * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29,
+ * otherwise fyl2x with the needed extra computation.
+ */
+ .text
+ENTRY(__log1pf)
+ fldln2
+
+ flds 4(%esp)
+
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ fxam
+ fnstsw
+ fld %st
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4: fabs
+ fcomps MO(limit)
+ fnstsw
+ sahf
+ jc 2f
+
+ fadds MO(one)
+ fyl2x
+ ret
+
+2: fyl2xp1
+ FLT_CHECK_FORCE_UFLOW_NONNAN
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ ret
+
+END (__log1pf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_log1pl.S b/REORG.TODO/sysdeps/i386/fpu/s_log1pl.S
new file mode 100644
index 0000000000..0fd05cbdb3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_log1pl.S
@@ -0,0 +1,76 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $")
+
+ .section .rodata
+
+ .align ALIGNARG(4)
+ /* The fyl2xp1 can only be used for values in
+ -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2
+ 0.29 is a safe value.
+ */
+limit: .tfloat 0.29
+ /* Please note: we use a double value here. Since 1.0 has
+ an exact representation this does not effect the accuracy
+ but it helps to optimize the code. */
+one: .double 1.0
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+/*
+ * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29,
+ * otherwise fyl2x with the needed extra computation.
+ */
+ .text
+ENTRY(__log1pl)
+ fldln2
+
+ fldt 4(%esp)
+
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+
+ fxam
+ fnstsw
+ fld %st
+ sahf
+ jc 3f // in case x is NaN or ±Inf
+4:
+ fabs
+ fldt MO(limit)
+ fcompp
+ fnstsw
+ sahf
+ jnc 2f
+
+ movzwl 4+8(%esp), %eax
+ xorb $0x80, %ah
+ cmpl $0xc040, %eax
+ jae 5f
+
+ faddl MO(one)
+5: fyl2x
+ ret
+
+2: fyl2xp1
+ ret
+
+3: jp 4b // in case x is ±Inf
+ fstp %st(1)
+ fstp %st(1)
+ fadd %st(0)
+ ret
+
+END (__log1pl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_logb.S b/REORG.TODO/sysdeps/i386/fpu/s_logb.S
new file mode 100644
index 0000000000..f78c091c8a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_logb.S
@@ -0,0 +1,16 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_logb.S,v 1.4 1995/05/09 00:14:30 jtc Exp $")
+
+ENTRY(__logb)
+ fldl 4(%esp)
+ fxtract
+ fstp %st
+ ret
+END (__logb)
+weak_alias (__logb, logb)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_logbf.S b/REORG.TODO/sysdeps/i386/fpu/s_logbf.S
new file mode 100644
index 0000000000..91eb3d2925
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_logbf.S
@@ -0,0 +1,16 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_logbf.S,v 1.3 1995/05/09 00:15:12 jtc Exp $")
+
+ENTRY(__logbf)
+ flds 4(%esp)
+ fxtract
+ fstp %st
+ ret
+END (__logbf)
+weak_alias (__logbf, logbf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_logbl.c b/REORG.TODO/sysdeps/i386/fpu/s_logbl.c
new file mode 100644
index 0000000000..391e2db489
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_logbl.c
@@ -0,0 +1,19 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <math_private.h>
+
+long double
+__logbl (long double x)
+{
+ long double res;
+
+ asm ("fxtract\n"
+ "fstp %%st" : "=t" (res) : "0" (x));
+ return res;
+}
+
+weak_alias (__logbl, logbl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_lrint.S b/REORG.TODO/sysdeps/i386/fpu/s_lrint.S
new file mode 100644
index 0000000000..79a374b399
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_lrint.S
@@ -0,0 +1,34 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__lrint)
+ fldl 4(%esp)
+ subl $4, %esp
+ cfi_adjust_cfa_offset (4)
+ fistpl (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ ret
+END(__lrint)
+weak_alias (__lrint, lrint)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_lrintf.S b/REORG.TODO/sysdeps/i386/fpu/s_lrintf.S
new file mode 100644
index 0000000000..fc6e68e073
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_lrintf.S
@@ -0,0 +1,34 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__lrintf)
+ flds 4(%esp)
+ subl $4, %esp
+ cfi_adjust_cfa_offset (4)
+ fistpl (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ ret
+END(__lrintf)
+weak_alias (__lrintf, lrintf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_lrintl.S b/REORG.TODO/sysdeps/i386/fpu/s_lrintl.S
new file mode 100644
index 0000000000..ba6dbdf44c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_lrintl.S
@@ -0,0 +1,34 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__lrintl)
+ fldt 4(%esp)
+ subl $4, %esp
+ cfi_adjust_cfa_offset (4)
+ fistpl (%esp)
+ fwait
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ ret
+END(__lrintl)
+weak_alias (__lrintl, lrintl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/i386/fpu/s_nearbyint.S
new file mode 100644
index 0000000000..f7b79b6ff2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_nearbyint.S
@@ -0,0 +1,20 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */
+
+#include <machine/asm.h>
+
+ENTRY(__nearbyint)
+ fldl 4(%esp)
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ fnstenv 4(%esp)
+ frndint
+ fldenv 4(%esp)
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__nearbyint)
+weak_alias (__nearbyint, nearbyint)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/i386/fpu/s_nearbyintf.S
new file mode 100644
index 0000000000..92df2f87b3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_nearbyintf.S
@@ -0,0 +1,20 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */
+
+#include <machine/asm.h>
+
+ENTRY(__nearbyintf)
+ flds 4(%esp)
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ fnstenv 4(%esp)
+ frndint
+ fldenv 4(%esp)
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__nearbyintf)
+weak_alias (__nearbyintf, nearbyintf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_nearbyintl.S b/REORG.TODO/sysdeps/i386/fpu/s_nearbyintl.S
new file mode 100644
index 0000000000..3b7d1e2436
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_nearbyintl.S
@@ -0,0 +1,23 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */
+
+#include <machine/asm.h>
+
+ENTRY(__nearbyintl)
+ fldt 4(%esp)
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ fnstenv 4(%esp)
+ frndint
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, 8(%esp)
+ fldenv 4(%esp)
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END (__nearbyintl)
+weak_alias (__nearbyintl, nearbyintl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_nextafterl.c b/REORG.TODO/sysdeps/i386/fpu/s_nextafterl.c
new file mode 100644
index 0000000000..600ad7a8d3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_nextafterl.c
@@ -0,0 +1,125 @@
+/* s_nextafterl.c -- long double version of s_nextafter.c.
+ * Special version for i387.
+ * Conversion to long double by Ulrich Drepper,
+ * Cygnus Support, drepper@cygnus.com.
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#if defined(LIBM_SCCS) && !defined(lint)
+static char rcsid[] = "$NetBSD: $";
+#endif
+
+/* IEEE functions
+ * nextafterl(x,y)
+ * return the next machine floating-point number of x in the
+ * direction toward y.
+ * Special cases:
+ */
+
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+
+long double __nextafterl(long double x, long double y)
+{
+ u_int32_t hx,hy,ix,iy;
+ u_int32_t lx,ly;
+ int32_t esx,esy;
+
+ GET_LDOUBLE_WORDS(esx,hx,lx,x);
+ GET_LDOUBLE_WORDS(esy,hy,ly,y);
+ ix = esx&0x7fff; /* |x| */
+ iy = esy&0x7fff; /* |y| */
+
+ /* Intel's extended format has the normally implicit 1 explicit
+ present. Sigh! */
+ if(((ix==0x7fff)&&(((hx&0x7fffffff)|lx)!=0)) || /* x is nan */
+ ((iy==0x7fff)&&(((hy&0x7fffffff)|ly)!=0))) /* y is nan */
+ return x+y;
+ if(x==y) return y; /* x=y, return y */
+ if((ix|hx|lx)==0) { /* x == 0 */
+ long double u;
+ SET_LDOUBLE_WORDS(x,esy&0x8000,0,1);/* return +-minsubnormal */
+ u = math_opt_barrier (x);
+ u = u * u;
+ math_force_eval (u); /* raise underflow flag */
+ return x;
+ }
+ if(esx>=0) { /* x > 0 */
+ if(esx>esy||((esx==esy) && (hx>hy||((hx==hy)&&(lx>ly))))) {
+ /* x > y, x -= ulp */
+ if(lx==0) {
+ if (hx <= 0x80000000) {
+ if (esx == 0) {
+ --hx;
+ } else {
+ esx -= 1;
+ hx = hx - 1;
+ if (esx > 0)
+ hx |= 0x80000000;
+ }
+ } else
+ hx -= 1;
+ }
+ lx -= 1;
+ } else { /* x < y, x += ulp */
+ lx += 1;
+ if(lx==0) {
+ hx += 1;
+ if (hx==0 || (esx == 0 && hx == 0x80000000)) {
+ esx += 1;
+ hx |= 0x80000000;
+ }
+ }
+ }
+ } else { /* x < 0 */
+ if(esy>=0||(esx>esy||((esx==esy)&&(hx>hy||((hx==hy)&&(lx>ly)))))){
+ /* x < y, x -= ulp */
+ if(lx==0) {
+ if (hx <= 0x80000000 && esx != 0xffff8000) {
+ esx -= 1;
+ hx = hx - 1;
+ if ((esx&0x7fff) > 0)
+ hx |= 0x80000000;
+ } else
+ hx -= 1;
+ }
+ lx -= 1;
+ } else { /* x > y, x += ulp */
+ lx += 1;
+ if(lx==0) {
+ hx += 1;
+ if (hx==0 || (esx == 0xffff8000 && hx == 0x80000000)) {
+ esx += 1;
+ hx |= 0x80000000;
+ }
+ }
+ }
+ }
+ esy = esx&0x7fff;
+ if(esy==0x7fff) {
+ long double u = x + x; /* overflow */
+ math_force_eval (u);
+ __set_errno (ERANGE);
+ }
+ if(esy==0) {
+ long double u = x*x; /* underflow */
+ math_force_eval (u); /* raise underflow flag */
+ __set_errno (ERANGE);
+ }
+ SET_LDOUBLE_WORDS(x,esx,hx,lx);
+ return x;
+}
+weak_alias (__nextafterl, nextafterl)
+strong_alias (__nextafterl, __nexttowardl)
+weak_alias (__nextafterl, nexttowardl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_nexttoward.c b/REORG.TODO/sysdeps/i386/fpu/s_nexttoward.c
new file mode 100644
index 0000000000..0b47044760
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_nexttoward.c
@@ -0,0 +1,93 @@
+/* s_nexttoward.c
+ * Special i387 version
+ * Conversion from s_nextafter.c by Ulrich Drepper, Cygnus Support,
+ * drepper@cygnus.com.
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#if defined(LIBM_SCCS) && !defined(lint)
+static char rcsid[] = "$NetBSD: $";
+#endif
+
+/* IEEE functions
+ * nexttoward(x,y)
+ * return the next machine floating-point number of x in the
+ * direction toward y.
+ * Special cases:
+ */
+
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+#include <float.h>
+
+double __nexttoward(double x, long double y)
+{
+ int32_t hx,ix,iy;
+ u_int32_t lx,hy,ly,esy;
+
+ EXTRACT_WORDS(hx,lx,x);
+ GET_LDOUBLE_WORDS(esy,hy,ly,y);
+ ix = hx&0x7fffffff; /* |x| */
+ iy = esy&0x7fff; /* |y| */
+
+ /* Intel's extended format has the normally implicit 1 explicit
+ present. Sigh! */
+ if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */
+ ((iy>=0x7fff)&&((hy&0x7fffffff)|ly)!=0)) /* y is nan */
+ return x+y;
+ if((long double) x==y) return y; /* x=y, return y */
+ if((ix|lx)==0) { /* x == 0 */
+ double u;
+ INSERT_WORDS(x,(esy&0x8000)<<16,1); /* return +-minsub */
+ u = math_opt_barrier (x);
+ u = u * u;
+ math_force_eval (u); /* raise underflow flag */
+ return x;
+ }
+ if(hx>=0) { /* x > 0 */
+ if (x > y) { /* x -= ulp */
+ if(lx==0) hx -= 1;
+ lx -= 1;
+ } else { /* x < y, x += ulp */
+ lx += 1;
+ if(lx==0) hx += 1;
+ }
+ } else { /* x < 0 */
+ if (x < y) { /* x -= ulp */
+ if(lx==0) hx -= 1;
+ lx -= 1;
+ } else { /* x > y, x += ulp */
+ lx += 1;
+ if(lx==0) hx += 1;
+ }
+ }
+ hy = hx&0x7ff00000;
+ if(hy>=0x7ff00000) {
+ double u = x+x; /* overflow */
+ math_force_eval (u);
+ __set_errno (ERANGE);
+ }
+ if(hy<0x00100000) {
+ double u = x*x; /* underflow */
+ math_force_eval (u); /* raise underflow flag */
+ __set_errno (ERANGE);
+ }
+ INSERT_WORDS(x,hx,lx);
+ return x;
+}
+weak_alias (__nexttoward, nexttoward)
+#ifdef NO_LONG_DOUBLE
+strong_alias (__nexttoward, __nexttowardl)
+weak_alias (__nexttoward, nexttowardl)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_nexttowardf.c b/REORG.TODO/sysdeps/i386/fpu/s_nexttowardf.c
new file mode 100644
index 0000000000..e1156d1e4f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_nexttowardf.c
@@ -0,0 +1,77 @@
+/* s_nexttowardf.c -- float version of s_nextafter.c.
+ * Special i387 version.
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#if defined(LIBM_SCCS) && !defined(lint)
+static char rcsid[] = "$NetBSD: $";
+#endif
+
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+#include <float.h>
+
+float __nexttowardf(float x, long double y)
+{
+ int32_t hx,ix,iy;
+ u_int32_t hy,ly,esy;
+
+ GET_FLOAT_WORD(hx,x);
+ GET_LDOUBLE_WORDS(esy,hy,ly,y);
+ ix = hx&0x7fffffff; /* |x| */
+ iy = esy&0x7fff; /* |y| */
+
+ /* Intel's extended format has the normally implicit 1 explicit
+ present. Sigh! */
+ if((ix>0x7f800000) || /* x is nan */
+ (iy>=0x7fff&&(((hy&0x7fffffff)|ly)!=0))) /* y is nan */
+ return x+y;
+ if((long double) x==y) return y; /* x=y, return y */
+ if(ix==0) { /* x == 0 */
+ float u;
+ SET_FLOAT_WORD(x,((esy&0x8000)<<16)|1);/* return +-minsub*/
+ u = math_opt_barrier (x);
+ u = u * u;
+ math_force_eval (u); /* raise underflow flag */
+ return x;
+ }
+ if(hx>=0) { /* x > 0 */
+ if(x > y) { /* x -= ulp */
+ hx -= 1;
+ } else { /* x < y, x += ulp */
+ hx += 1;
+ }
+ } else { /* x < 0 */
+ if(x < y) { /* x -= ulp */
+ hx -= 1;
+ } else { /* x > y, x += ulp */
+ hx += 1;
+ }
+ }
+ hy = hx&0x7f800000;
+ if(hy>=0x7f800000) {
+ float u = x+x; /* overflow */
+ math_force_eval (u);
+ __set_errno (ERANGE);
+ }
+ if(hy<0x00800000) {
+ float u = x*x; /* underflow */
+ math_force_eval (u); /* raise underflow flag */
+ __set_errno (ERANGE);
+ }
+ SET_FLOAT_WORD(x,hx);
+ return x;
+}
+weak_alias (__nexttowardf, nexttowardf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_remquo.S b/REORG.TODO/sysdeps/i386/fpu/s_remquo.S
new file mode 100644
index 0000000000..341285db30
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_remquo.S
@@ -0,0 +1,45 @@
+/*
+ * Written by Ulrich Drepper <drepper@cygnus.com>.
+ * Based on e_remainder by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+#define PARMS 4 /* no space for saved regs */
+#define DVDND PARMS
+#define DVSOR DVDND+8
+#define QUOP DVSOR+8
+
+ .text
+ENTRY (__remquo)
+
+ fldl DVSOR(%esp)
+ fldl DVDND(%esp)
+1: fprem1
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ /* Compute the congruent of the quotient. */
+ movl %eax, %ecx
+ shrl $8, %eax
+ shrl $12, %ecx
+ andl $4, %ecx
+ andl $3, %eax
+ orl %eax, %ecx
+ leal (%ecx,%ecx,2),%ecx
+ movl $0xef2a60, %eax
+ shrl %cl, %eax
+ andl $7, %eax
+ movl QUOP(%esp), %ecx
+ movl DVDND+4(%esp), %edx
+ xorl DVSOR+4(%esp), %edx
+ testl $0x80000000, %edx
+ jz 1f
+ negl %eax
+1: movl %eax, (%ecx)
+
+ ret
+END (__remquo)
+weak_alias (__remquo, remquo)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_remquof.S b/REORG.TODO/sysdeps/i386/fpu/s_remquof.S
new file mode 100644
index 0000000000..62063f068f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_remquof.S
@@ -0,0 +1,45 @@
+/*
+ * Written by Ulrich Drepper <drepper@cygnus.com>.
+ * Based on e_remainder by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+#define PARMS 4 /* no space for saved regs */
+#define DVDND PARMS
+#define DVSOR DVDND+4
+#define QUOP DVSOR+4
+
+ .text
+ENTRY (__remquof)
+
+ flds DVSOR(%esp)
+ flds DVDND(%esp)
+1: fprem1
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ /* Compute the congruent of the quotient. */
+ movl %eax, %ecx
+ shrl $8, %eax
+ shrl $12, %ecx
+ andl $4, %ecx
+ andl $3, %eax
+ orl %eax, %ecx
+ leal (%ecx,%ecx,2),%ecx
+ movl $0xef2a60, %eax
+ shrl %cl, %eax
+ andl $7, %eax
+ movl QUOP(%esp), %ecx
+ movl DVDND(%esp), %edx
+ xorl DVSOR(%esp), %edx
+ testl $0x80000000, %edx
+ jz 1f
+ negl %eax
+1: movl %eax, (%ecx)
+
+ ret
+END (__remquof)
+weak_alias (__remquof, remquof)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_remquol.S b/REORG.TODO/sysdeps/i386/fpu/s_remquol.S
new file mode 100644
index 0000000000..f3d84fc7c2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_remquol.S
@@ -0,0 +1,45 @@
+/*
+ * Written by Ulrich Drepper <drepper@cygnus.com>.
+ * Based on e_remainder by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+#define PARMS 4 /* no space for saved regs */
+#define DVDND PARMS
+#define DVSOR DVDND+12
+#define QUOP DVSOR+12
+
+ .text
+ENTRY (__remquol)
+
+ fldt DVSOR(%esp)
+ fldt DVDND(%esp)
+1: fprem1
+ fstsw %ax
+ sahf
+ jp 1b
+ fstp %st(1)
+ /* Compute the congruent of the quotient. */
+ movl %eax, %ecx
+ shrl $8, %eax
+ shrl $12, %ecx
+ andl $4, %ecx
+ andl $3, %eax
+ orl %eax, %ecx
+ leal (%ecx,%ecx,2),%ecx
+ movl $0xef2a60, %eax
+ shrl %cl, %eax
+ andl $7, %eax
+ movl QUOP(%esp), %ecx
+ movl DVDND+8(%esp), %edx
+ xorl DVSOR+8(%esp), %edx
+ testl $0x8000, %edx
+ jz 1f
+ negl %eax
+1: movl %eax, (%ecx)
+
+ ret
+END (__remquol)
+weak_alias (__remquol, remquol)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_rint.S b/REORG.TODO/sysdeps/i386/fpu/s_rint.S
new file mode 100644
index 0000000000..be36c5f0ca
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_rint.S
@@ -0,0 +1,15 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_rint.S,v 1.4 1995/05/09 00:16:08 jtc Exp $")
+
+ENTRY(__rint)
+ fldl 4(%esp)
+ frndint
+ ret
+END (__rint)
+weak_alias (__rint, rint)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_rintf.S b/REORG.TODO/sysdeps/i386/fpu/s_rintf.S
new file mode 100644
index 0000000000..2b358c1cf1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_rintf.S
@@ -0,0 +1,15 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_rintf.S,v 1.3 1995/05/09 00:17:22 jtc Exp $")
+
+ENTRY(__rintf)
+ flds 4(%esp)
+ frndint
+ ret
+END (__rintf)
+weak_alias (__rintf, rintf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_rintl.c b/REORG.TODO/sysdeps/i386/fpu/s_rintl.c
new file mode 100644
index 0000000000..66af9cb675
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_rintl.c
@@ -0,0 +1,18 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <math_private.h>
+
+long double
+__rintl (long double x)
+{
+ long double res;
+
+ asm ("frndint" : "=t" (res) : "0" (x));
+ return res;
+}
+
+weak_alias (__rintl, rintl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_scalbln.c b/REORG.TODO/sysdeps/i386/fpu/s_scalbln.c
new file mode 100644
index 0000000000..1009713fbc
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_scalbln.c
@@ -0,0 +1,2 @@
+/* Nothing to do. This function is the same as scalbn. So we define an
+ alias. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_scalblnf.c b/REORG.TODO/sysdeps/i386/fpu/s_scalblnf.c
new file mode 100644
index 0000000000..5e558c3540
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_scalblnf.c
@@ -0,0 +1,2 @@
+/* Nothing to do. This function is the same as scalbnf. So we define an
+ alias. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_scalblnl.c b/REORG.TODO/sysdeps/i386/fpu/s_scalblnl.c
new file mode 100644
index 0000000000..cda2ec11c8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_scalblnl.c
@@ -0,0 +1,2 @@
+/* Nothing to do. This function is the same as scalbnl. So we define an
+ alias. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_scalbn.S b/REORG.TODO/sysdeps/i386/fpu/s_scalbn.S
new file mode 100644
index 0000000000..4e90903115
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_scalbn.S
@@ -0,0 +1,24 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: s_scalbn.S,v 1.4 1995/05/09 00:19:06 jtc Exp $")
+
+ENTRY(__scalbn)
+ fildl 12(%esp)
+ fldl 4(%esp)
+ fscale
+ fstp %st(1)
+ DBL_NARROW_EVAL
+ ret
+END (__scalbn)
+strong_alias (__scalbn, __scalbln)
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_20)
+compat_symbol (libc, __scalbn, scalbln, GLIBC_2_1);
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_scalbnf.S b/REORG.TODO/sysdeps/i386/fpu/s_scalbnf.S
new file mode 100644
index 0000000000..f8353c4c75
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_scalbnf.S
@@ -0,0 +1,24 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+#include <i386-math-asm.h>
+
+RCSID("$NetBSD: s_scalbnf.S,v 1.3 1995/05/09 00:19:59 jtc Exp $")
+
+ENTRY(__scalbnf)
+ fildl 8(%esp)
+ flds 4(%esp)
+ fscale
+ fstp %st(1)
+ FLT_NARROW_EVAL
+ ret
+END (__scalbnf)
+strong_alias (__scalbnf, __scalblnf)
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_20)
+compat_symbol (libc, __scalbnf, scalblnf, GLIBC_2_1);
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_scalbnl.S b/REORG.TODO/sysdeps/i386/fpu/s_scalbnl.S
new file mode 100644
index 0000000000..839b5ff353
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_scalbnl.S
@@ -0,0 +1,23 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: $")
+
+ENTRY(__scalbnl)
+ fildl 16(%esp)
+ fldt 4(%esp)
+ fscale
+ fstp %st(1)
+ ret
+END (__scalbnl)
+strong_alias (__scalbnl, __scalblnl)
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_20)
+compat_symbol (libc, __scalbnl, scalblnl, GLIBC_2_1);
+#endif
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_significand.S b/REORG.TODO/sysdeps/i386/fpu/s_significand.S
new file mode 100644
index 0000000000..4859b7ed71
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_significand.S
@@ -0,0 +1,16 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_significand.S,v 1.4 1995/05/09 00:21:47 jtc Exp $")
+
+ENTRY(__significand)
+ fldl 4(%esp)
+ fxtract
+ fstp %st(1)
+ ret
+END (__significand)
+weak_alias (__significand, significand)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_significandf.S b/REORG.TODO/sysdeps/i386/fpu/s_significandf.S
new file mode 100644
index 0000000000..3a2de97759
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_significandf.S
@@ -0,0 +1,16 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: s_significandf.S,v 1.3 1995/05/09 00:24:07 jtc Exp $")
+
+ENTRY(__significandf)
+ flds 4(%esp)
+ fxtract
+ fstp %st(1)
+ ret
+END (__significandf)
+weak_alias (__significandf, significandf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_significandl.c b/REORG.TODO/sysdeps/i386/fpu/s_significandl.c
new file mode 100644
index 0000000000..b8cb093502
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_significandl.c
@@ -0,0 +1,19 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Changes for long double by Ulrich Drepper <drepper@cygnus.com>
+ * Public domain.
+ */
+
+#include <math_private.h>
+
+long double
+__significandl (long double x)
+{
+ long double res;
+
+ asm ("fxtract\n"
+ "fstp %%st(1)" : "=t" (res) : "0" (x));
+ return res;
+}
+
+weak_alias (__significandl, significandl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_trunc.S b/REORG.TODO/sysdeps/i386/fpu/s_trunc.S
new file mode 100644
index 0000000000..e9a850b877
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_trunc.S
@@ -0,0 +1,37 @@
+/* Truncate double value.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ENTRY(__trunc)
+ fldl 4(%esp)
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ fnstenv 4(%esp)
+ movl $0xc00, %edx
+ orl 4(%esp), %edx
+ movl %edx, (%esp)
+ fldcw (%esp)
+ frndint
+ fldenv 4(%esp)
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END(__trunc)
+weak_alias (__trunc, trunc)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_truncf.S b/REORG.TODO/sysdeps/i386/fpu/s_truncf.S
new file mode 100644
index 0000000000..a93f5b9a2e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_truncf.S
@@ -0,0 +1,37 @@
+/* Truncate float value.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ENTRY(__truncf)
+ flds 4(%esp)
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ fnstenv 4(%esp)
+ movl $0xc00, %edx
+ orl 4(%esp), %edx
+ movl %edx, (%esp)
+ fldcw (%esp)
+ frndint
+ fldenv 4(%esp)
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END(__truncf)
+weak_alias (__truncf, truncf)
diff --git a/REORG.TODO/sysdeps/i386/fpu/s_truncl.S b/REORG.TODO/sysdeps/i386/fpu/s_truncl.S
new file mode 100644
index 0000000000..a884123612
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/s_truncl.S
@@ -0,0 +1,40 @@
+/* Truncate long double value.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <machine/asm.h>
+
+ENTRY(__truncl)
+ fldt 4(%esp)
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ fnstenv 4(%esp)
+ movl $0xc00, %edx
+ orl 4(%esp), %edx
+ movl %edx, (%esp)
+ fldcw (%esp)
+ frndint
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, 8(%esp)
+ fldenv 4(%esp)
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ ret
+END(__truncl)
+weak_alias (__truncl, truncl)
diff --git a/REORG.TODO/sysdeps/i386/fpu/slowexp.c b/REORG.TODO/sysdeps/i386/fpu/slowexp.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/slowexp.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/slowpow.c b/REORG.TODO/sysdeps/i386/fpu/slowpow.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/slowpow.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/t_exp.c b/REORG.TODO/sysdeps/i386/fpu/t_exp.c
new file mode 100644
index 0000000000..fd37963b05
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/t_exp.c
@@ -0,0 +1 @@
+/* Empty. Not needed. */
diff --git a/REORG.TODO/sysdeps/i386/fpu/w_sqrt_compat.c b/REORG.TODO/sysdeps/i386/fpu/w_sqrt_compat.c
new file mode 100644
index 0000000000..ddd36d0964
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/fpu/w_sqrt_compat.c
@@ -0,0 +1,8 @@
+/* The inline __ieee754_sqrt is not correctly rounding; it's OK for
+ most internal uses in glibc, but not for sqrt itself. */
+#define __ieee754_sqrt __avoid_ieee754_sqrt
+#include <math.h>
+#include <math_private.h>
+#undef __ieee754_sqrt
+extern double __ieee754_sqrt (double);
+#include <math/w_sqrt_compat.c>
diff --git a/REORG.TODO/sysdeps/i386/gccframe.h b/REORG.TODO/sysdeps/i386/gccframe.h
new file mode 100644
index 0000000000..579da40ae9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/gccframe.h
@@ -0,0 +1,27 @@
+/* Definition of object in frame unwind info. i386 version.
+ Copyright (C) 2001-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define DWARF_FRAME_REGISTERS 17
+
+#define CRT_GET_RFIB_DATA(BASE) \
+ { \
+ register void *__ebx __asm__("ebx");\
+ BASE = __ebx; \
+ }
+
+#include <sysdeps/generic/gccframe.h>
diff --git a/REORG.TODO/sysdeps/i386/gmp-mparam.h b/REORG.TODO/sysdeps/i386/gmp-mparam.h
new file mode 100644
index 0000000000..7ea503a403
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/gmp-mparam.h
@@ -0,0 +1,28 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+#define IEEE_DOUBLE_BIG_ENDIAN 0
diff --git a/REORG.TODO/sysdeps/i386/htonl.S b/REORG.TODO/sysdeps/i386/htonl.S
new file mode 100644
index 0000000000..63279bb6e1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/htonl.S
@@ -0,0 +1,34 @@
+/* Change byte order in word. For Intel 80x86, x >= 4.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/*
+ INPUT PARAMETERS:
+ word (sp + 4)
+*/
+
+ .text
+ENTRY (htonl)
+ movl 4(%esp), %eax
+ bswap %eax
+ ret
+END (htonl)
+
+weak_alias (htonl, ntohl)
diff --git a/REORG.TODO/sysdeps/i386/htons.S b/REORG.TODO/sysdeps/i386/htons.S
new file mode 100644
index 0000000000..a3c53a9944
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/htons.S
@@ -0,0 +1,35 @@
+/* Change byte order in word. For Intel 80x86, x >= 3.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/*
+ INPUT PARAMETERS:
+ word (sp + 4)
+*/
+
+ .text
+ENTRY (htons)
+ movl 4(%esp), %eax
+ andl $0xffff, %eax
+ rorw $8, %ax
+ ret
+END (htons)
+
+weak_alias (htons, ntohs)
diff --git a/REORG.TODO/sysdeps/i386/i386-mcount.S b/REORG.TODO/sysdeps/i386/i386-mcount.S
new file mode 100644
index 0000000000..733b8c78e7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i386-mcount.S
@@ -0,0 +1,79 @@
+/* i386-specific implementation of profiling support.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* We need a special version of the `mcount' function since for ix86 it
+ must not clobber any register. This has several reasons:
+ - there is a bug in gcc as of version 2.7.2.2 which prohibits the
+ use of profiling together with nested functions
+ - the ELF `fixup' function uses GCC's regparm feature
+ - some (future) systems might want to pass parameters in registers. */
+
+ .globl C_SYMBOL_NAME(_mcount)
+ .type C_SYMBOL_NAME(_mcount), @function
+ .align ALIGNARG(4)
+C_LABEL(_mcount)
+ /* Save the caller-clobbered registers. */
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ movl 12(%esp), %edx
+ movl 4(%ebp), %eax
+
+ /* No need to access the PLT or GOT, __mcount_internal is an
+ internal function and we can make a relative call. */
+ call C_SYMBOL_NAME(__mcount_internal)
+
+ /* Pop the saved registers. Please note that `mcount' has no
+ return value. */
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+ ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(_mcount))
+
+#undef mcount
+weak_alias (_mcount, mcount)
+
+ /* Same as above, but doesn't require a frame pointer */
+ .globl C_SYMBOL_NAME(__fentry__)
+ .type C_SYMBOL_NAME(__fentry__), @function
+ .align ALIGNARG(4)
+C_LABEL(__fentry__)
+ /* Save the caller-clobbered registers. */
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ movl 12(%esp), %edx
+ movl 16(%esp), %eax
+
+ /* No need to access the PLT or GOT, __mcount_internal is an
+ internal function and we can make a relative call. */
+ call C_SYMBOL_NAME(__mcount_internal)
+
+ /* Pop the saved registers. Please note that `__fentry__' has no
+ return value. */
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+ ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(__fentry__))
diff --git a/REORG.TODO/sysdeps/i386/i586/add_n.S b/REORG.TODO/sysdeps/i386/i586/add_n.S
new file mode 100644
index 0000000000..f73df092f0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/add_n.S
@@ -0,0 +1,143 @@
+/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define S2 S1+4
+#define SIZE S2+4
+
+ .text
+ENTRY (__mpn_add_n)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 12)
+ movl S1(%esp),%esi
+ cfi_rel_offset (esi, 8)
+ movl S2(%esp),%ebx
+ cfi_rel_offset (ebx, 0)
+ movl SIZE(%esp),%ecx
+ movl (%ebx),%ebp
+ cfi_rel_offset (ebp, 4)
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx /* zero carry flag */
+ jz L(end)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+
+ ALIGN (3)
+L(oop): movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L(1): movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %ebp,%eax
+ movl 4(%ebx),%ebp
+ adcl %ebp,%edx
+ movl 8(%ebx),%ebp
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L(2): movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ adcl %ebp,%eax
+ movl 12(%ebx),%ebp
+ adcl %ebp,%edx
+ movl 16(%ebx),%ebp
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L(3): movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ adcl %ebp,%eax
+ movl 20(%ebx),%ebp
+ adcl %ebp,%edx
+ movl 24(%ebx),%ebp
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L(4): movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %ebp,%eax
+ movl 28(%ebx),%ebp
+ adcl %ebp,%edx
+ movl 32(%ebx),%ebp
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebx),%ebx
+ decl %ecx
+ jnz L(oop)
+
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+L(end):
+ decl %edx /* test %edx w/o clobbering carry */
+ js L(end2)
+ incl %edx
+L(oop2):
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ adcl %ebp,%eax
+ movl 4(%ebx),%ebp
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebx),%ebx
+ decl %edx
+ jnz L(oop2)
+L(end2):
+ movl (%esi),%eax
+ adcl %ebp,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_add_n)
diff --git a/REORG.TODO/sysdeps/i386/i586/addmul_1.S b/REORG.TODO/sysdeps/i386/i586/addmul_1.S
new file mode 100644
index 0000000000..a713192982
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/addmul_1.S
@@ -0,0 +1,94 @@
+/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ the result to a second limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define SIZE S1+4
+#define S2LIMB SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+ .text
+ENTRY (__mpn_addmul_1)
+
+ pushl %res_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %s1_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %s2_limb
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp), %res_ptr
+ cfi_rel_offset (res_ptr, 12)
+ movl S1(%esp), %s1_ptr
+ cfi_rel_offset (s1_ptr, 8)
+ movl SIZE(%esp), %size
+ movl S2LIMB(%esp), %s2_limb
+ cfi_rel_offset (s2_limb, 0)
+ leal (%res_ptr,%size,4), %res_ptr
+ leal (%s1_ptr,%size,4), %s1_ptr
+ negl %size
+ xorl %ebp, %ebp
+ cfi_rel_offset (ebp, 4)
+ ALIGN (3)
+
+L(oop): adcl $0, %ebp
+ movl (%s1_ptr,%size,4), %eax
+
+ mull %s2_limb
+
+ addl %ebp, %eax
+ movl (%res_ptr,%size,4), %ebp
+
+ adcl $0, %edx
+ addl %eax, %ebp
+
+ movl %ebp, (%res_ptr,%size,4)
+ incl %size
+
+ movl %edx, %ebp
+ jnz L(oop)
+
+ adcl $0, %ebp
+ movl %ebp, %eax
+ popl %s2_limb
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s2_limb)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %s1_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s1_ptr)
+ popl %res_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (res_ptr)
+
+ ret
+#undef size
+END (__mpn_addmul_1)
diff --git a/REORG.TODO/sysdeps/i386/i586/bzero.S b/REORG.TODO/sysdeps/i386/i586/bzero.S
new file mode 100644
index 0000000000..2a106719a4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/bzero.S
@@ -0,0 +1,4 @@
+#define USE_AS_BZERO
+#define memset __bzero
+#include <sysdeps/i386/i586/memset.S>
+weak_alias (__bzero, bzero)
diff --git a/REORG.TODO/sysdeps/i386/i586/init-arch.h b/REORG.TODO/sysdeps/i386/i586/init-arch.h
new file mode 100644
index 0000000000..4711212e6f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/init-arch.h
@@ -0,0 +1,19 @@
+/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define MINIMUM_ISA 586
+#include <sysdeps/x86/init-arch.h>
diff --git a/REORG.TODO/sysdeps/i386/i586/lshift.S b/REORG.TODO/sysdeps/i386/i586/lshift.S
new file mode 100644
index 0000000000..7941c28d9d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/lshift.S
@@ -0,0 +1,255 @@
+/* Pentium optimized __mpn_lshift --
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S RES+4
+#define SIZE S+4
+#define CNT SIZE+4
+
+ .text
+ENTRY (__mpn_lshift)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebp, 0)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 12)
+ movl S(%esp),%esi
+ cfi_rel_offset (esi, 8)
+ movl SIZE(%esp),%ebx
+ cfi_rel_offset (ebx, 0)
+ movl CNT(%esp),%ecx
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne L(normal)
+ leal 4(%esi),%eax
+ cmpl %edi,%eax
+ jnc L(special) /* jump if s_ptr + 1 >= res_ptr */
+ leal (%esi,%ebx,4),%eax
+ cmpl %eax,%edi
+ jnc L(special) /* jump if res_ptr >= s_ptr + size */
+
+L(normal):
+ leal -4(%edi,%ebx,4),%edi
+ leal -4(%esi,%ebx,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+ xorl %eax,%eax
+ shldl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+ cfi_adjust_cfa_offset (4)
+
+ decl %ebx
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ shrl $3,%ebx
+ jz L(end)
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+L(oop): movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebp
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ shldl %cl,%eax,%ebp
+ shldl %cl,%edx,%eax
+ movl %ebp,(%edi)
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebp
+ movl -12(%esi),%eax
+ shldl %cl,%ebp,%edx
+ shldl %cl,%eax,%ebp
+ movl %edx,-8(%edi)
+ movl %ebp,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebp
+ shldl %cl,%edx,%eax
+ shldl %cl,%ebp,%edx
+ movl %eax,-16(%edi)
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ shldl %cl,%eax,%ebp
+ shldl %cl,%edx,%eax
+ movl %ebp,-24(%edi)
+ movl %eax,-28(%edi)
+
+ subl $32,%esi
+ subl $32,%edi
+ decl %ebx
+ jnz L(oop)
+
+L(end): popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ andl $7,%ebx
+ jz L(end2)
+L(oop2):
+ movl (%esi),%eax
+ shldl %cl,%eax,%edx
+ movl %edx,(%edi)
+ movl %eax,%edx
+ subl $4,%esi
+ subl $4,%edi
+ decl %ebx
+ jnz L(oop2)
+
+L(end2):
+ shll %cl,%edx /* compute least significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+ cfi_adjust_cfa_offset (-4)
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissible if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (edi, 12)
+ cfi_rel_offset (esi, 8)
+ cfi_rel_offset (ebp, 4)
+ cfi_rel_offset (ebx, 0)
+L(special):
+ movl (%esi),%edx
+ addl $4,%esi
+
+ decl %ebx
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ shrl $3,%ebx
+
+ addl %edx,%edx
+ incl %ebx
+ decl %ebx
+ jz L(Lend)
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+L(Loop):
+ movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebp
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %eax,%eax
+ movl %ebp,(%edi)
+ adcl %edx,%edx
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebp
+ movl 12(%esi),%eax
+ adcl %ebp,%ebp
+ movl %edx,8(%edi)
+ adcl %eax,%eax
+ movl %ebp,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebp
+ adcl %edx,%edx
+ movl %eax,16(%edi)
+ adcl %ebp,%ebp
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %eax,%eax
+ movl %ebp,24(%edi)
+ adcl %edx,%edx
+ movl %eax,28(%edi)
+
+ leal 32(%esi),%esi /* use leal not to clobber carry */
+ leal 32(%edi),%edi
+ decl %ebx
+ jnz L(Loop)
+
+L(Lend):
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebx
+ jz L(Lend2)
+ addl %eax,%eax /* restore carry from eax */
+L(Loop2):
+ movl %edx,%ebp
+ movl (%esi),%edx
+ adcl %edx,%edx
+ movl %ebp,(%edi)
+
+ leal 4(%esi),%esi /* use leal not to clobber carry */
+ leal 4(%edi),%edi
+ decl %ebx
+ jnz L(Loop2)
+
+ jmp L(L1)
+L(Lend2):
+ addl %eax,%eax /* restore carry from eax */
+L(L1): movl %edx,(%edi) /* store last limb */
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_lshift)
diff --git a/REORG.TODO/sysdeps/i386/i586/memcopy.h b/REORG.TODO/sysdeps/i386/i586/memcopy.h
new file mode 100644
index 0000000000..39f020a746
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memcopy.h
@@ -0,0 +1,95 @@
+/* memcopy.h -- definitions for memory copy functions. Pentium version.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Get the i386 definitions. We will override some of them below. */
+#include <sysdeps/i386/memcopy.h>
+
+/* Written like this, the Pentium pipeline can execute the loop at a
+ sustained rate of 2 instructions/clock, or asymptotically 480
+ Mbytes/second at 60Mhz. */
+
+#undef WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
+ do \
+ { \
+ asm volatile ("subl $32,%2\n" \
+ "js 2f\n" \
+ "movl 0(%0),%%edx\n" /* alloc dest line */ \
+ "1:\n" \
+ "movl 28(%0),%%eax\n" /* alloc dest line */ \
+ "subl $32,%2\n" /* decr loop count */ \
+ "movl 0(%1),%%eax\n" /* U pipe */ \
+ "movl 4(%1),%%edx\n" /* V pipe */ \
+ "movl %%eax,0(%0)\n" /* U pipe */ \
+ "movl %%edx,4(%0)\n" /* V pipe */ \
+ "movl 8(%1),%%eax\n" \
+ "movl 12(%1),%%edx\n" \
+ "movl %%eax,8(%0)\n" \
+ "movl %%edx,12(%0)\n" \
+ "movl 16(%1),%%eax\n" \
+ "movl 20(%1),%%edx\n" \
+ "movl %%eax,16(%0)\n" \
+ "movl %%edx,20(%0)\n" \
+ "movl 24(%1),%%eax\n" \
+ "movl 28(%1),%%edx\n" \
+ "movl %%eax,24(%0)\n" \
+ "movl %%edx,28(%0)\n" \
+ "leal 32(%1),%1\n" /* update src ptr */ \
+ "leal 32(%0),%0\n" /* update dst ptr */ \
+ "jns 1b\n" \
+ "2: addl $32,%2" : \
+ "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) : \
+ "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
+ "ax", "dx"); \
+ } while (0)
+
+#undef WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
+ do \
+ { \
+ asm volatile ("subl $32,%2\n" \
+ "js 2f\n" \
+ "movl -4(%0),%%edx\n" \
+ "1:\n" \
+ "movl -32(%0),%%eax\n" \
+ "subl $32,%2\n" \
+ "movl -4(%1),%%eax\n" \
+ "movl -8(%1),%%edx\n" \
+ "movl %%eax,-4(%0)\n" \
+ "movl %%edx,-8(%0)\n" \
+ "movl -12(%1),%%eax\n" \
+ "movl -16(%1),%%edx\n" \
+ "movl %%eax,-12(%0)\n" \
+ "movl %%edx,-16(%0)\n" \
+ "movl -20(%1),%%eax\n" \
+ "movl -24(%1),%%edx\n" \
+ "movl %%eax,-20(%0)\n" \
+ "movl %%edx,-24(%0)\n" \
+ "movl -28(%1),%%eax\n" \
+ "movl -32(%1),%%edx\n" \
+ "movl %%eax,-28(%0)\n" \
+ "movl %%edx,-32(%0)\n" \
+ "leal -32(%1),%1\n" \
+ "leal -32(%0),%0\n" \
+ "jns 1b\n" \
+ "2: addl $32,%2" : \
+ "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) : \
+ "0" (dst_ep), "1" (src_ep), "2" (nbytes) : \
+ "ax", "dx"); \
+ } while (0)
diff --git a/REORG.TODO/sysdeps/i386/i586/memcpy.S b/REORG.TODO/sysdeps/i386/i586/memcpy.S
new file mode 100644
index 0000000000..6474a3f653
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memcpy.S
@@ -0,0 +1,124 @@
+/* Highly optimized version for i586.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+#define LEN SRC+4
+
+ .text
+#if defined PIC && IS_IN (libc)
+ENTRY (__memcpy_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memcpy_chk)
+#endif
+ENTRY (memcpy)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ movl DEST(%esp), %edi
+ cfi_rel_offset (edi, 4)
+ movl SRC(%esp), %esi
+ cfi_rel_offset (esi, 0)
+ movl LEN(%esp), %ecx
+ movl %edi, %eax
+
+ /* We need this in any case. */
+ cld
+
+ /* Cutoff for the big loop is a size of 32 bytes since otherwise
+ the loop will never be entered. */
+ cmpl $32, %ecx
+ jbe L(1)
+
+ negl %eax
+ andl $3, %eax
+ subl %eax, %ecx
+ xchgl %eax, %ecx
+
+ rep; movsb
+
+ movl %eax, %ecx
+ subl $32, %ecx
+ js L(2)
+
+ /* Read ahead to make sure we write in the cache since the stupid
+ i586 designers haven't implemented read-on-write-miss. */
+ movl (%edi), %eax
+L(3): movl 28(%edi), %edx
+
+ /* Now correct the loop counter. Please note that in the following
+ code the flags are not changed anymore. */
+ subl $32, %ecx
+
+ movl (%esi), %eax
+ movl 4(%esi), %edx
+ movl %eax, (%edi)
+ movl %edx, 4(%edi)
+ movl 8(%esi), %eax
+ movl 12(%esi), %edx
+ movl %eax, 8(%edi)
+ movl %edx, 12(%edi)
+ movl 16(%esi), %eax
+ movl 20(%esi), %edx
+ movl %eax, 16(%edi)
+ movl %edx, 20(%edi)
+ movl 24(%esi), %eax
+ movl 28(%esi), %edx
+ movl %eax, 24(%edi)
+ movl %edx, 28(%edi)
+
+ leal 32(%esi), %esi
+ leal 32(%edi), %edi
+
+ jns L(3)
+
+ /* Correct extra loop counter modification. */
+L(2): addl $32, %ecx
+#ifndef USE_AS_MEMPCPY
+ movl DEST(%esp), %eax
+#endif
+
+L(1): rep; movsb
+
+#ifdef USE_AS_MEMPCPY
+ movl %edi, %eax
+#endif
+
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (memcpy)
+#ifndef USE_AS_MEMPCPY
+libc_hidden_builtin_def (memcpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/mempcpy.S b/REORG.TODO/sysdeps/i386/i586/mempcpy.S
new file mode 100644
index 0000000000..720a4c0923
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/mempcpy.S
@@ -0,0 +1,8 @@
+#define USE_AS_MEMPCPY
+#define memcpy __mempcpy
+#define __memcpy_chk __mempcpy_chk
+#include <sysdeps/i386/i586/memcpy.S>
+
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/REORG.TODO/sysdeps/i386/i586/memset.S b/REORG.TODO/sysdeps/i386/i586/memset.S
new file mode 100644
index 0000000000..4f8f1bcf94
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memset.S
@@ -0,0 +1,121 @@
+/* memset/bzero -- set memory area to CH/0
+ Highly optimized version for ix86, x>=5.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund, <tege@matematik.su.se>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define RTN PARMS
+#define DEST RTN
+#ifdef USE_AS_BZERO
+# define LEN DEST+4
+#else
+# define CHR DEST+4
+# define LEN CHR+4
+#endif
+
+ .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk)
+#endif
+ENTRY (memset)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+
+ movl DEST(%esp), %edi
+ cfi_rel_offset (edi, 0)
+ movl LEN(%esp), %edx
+#ifdef USE_AS_BZERO
+ xorl %eax, %eax /* we fill with 0 */
+#else
+ movb CHR(%esp), %al
+ movb %al, %ah
+ movl %eax, %ecx
+ shll $16, %eax
+ movw %cx, %ax
+#endif
+ cld
+
+/* If less than 36 bytes to write, skip tricky code (it wouldn't work). */
+ cmpl $36, %edx
+ movl %edx, %ecx /* needed when branch is taken! */
+ jl L(2)
+
+/* First write 0-3 bytes to make the pointer 32-bit aligned. */
+ movl %edi, %ecx /* Copy ptr to ecx... */
+ negl %ecx /* ...and negate that and... */
+ andl $3, %ecx /* ...mask to get byte count. */
+ subl %ecx, %edx /* adjust global byte count */
+ rep
+ stosb
+
+ subl $32, %edx /* offset count for unrolled loop */
+ movl (%edi), %ecx /* Fetch destination cache line */
+
+ .align 2, 0x90 /* supply 0x90 for broken assemblers */
+L(1): movl 28(%edi), %ecx /* allocate cache line for destination */
+ subl $32, %edx /* decr loop count */
+ movl %eax, 0(%edi) /* store words pairwise */
+ movl %eax, 4(%edi)
+ movl %eax, 8(%edi)
+ movl %eax, 12(%edi)
+ movl %eax, 16(%edi)
+ movl %eax, 20(%edi)
+ movl %eax, 24(%edi)
+ movl %eax, 28(%edi)
+ leal 32(%edi), %edi /* update destination pointer */
+ jge L(1)
+
+ leal 32(%edx), %ecx /* reset offset count */
+
+/* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped). */
+L(2): shrl $2, %ecx /* convert byte count to longword count */
+ rep
+ stosl
+
+/* Finally write the last 0-3 bytes. */
+ movl %edx, %ecx
+ andl $3, %ecx
+ rep
+ stosb
+
+#ifndef USE_AS_BZERO
+ /* Load result (only if used as memset). */
+ movl DEST(%esp), %eax /* start address of destination is result */
+#endif
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (memset)
+libc_hidden_builtin_def (memset)
+
+#if defined SHARED && IS_IN (libc) && !defined __memset_chk \
+ && !defined USE_AS_BZERO
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+ .section .gnu.warning.__memset_zero_constant_len_parameter
+ .string "memset used with constant zero length parameter; this could be due to transposed parameters"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/memusage.h b/REORG.TODO/sysdeps/i386/i586/memusage.h
new file mode 100644
index 0000000000..c8170874d0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memusage.h
@@ -0,0 +1 @@
+#include "../i686/memusage.h"
diff --git a/REORG.TODO/sysdeps/i386/i586/mul_1.S b/REORG.TODO/sysdeps/i386/i586/mul_1.S
new file mode 100644
index 0000000000..bd3a07de90
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/mul_1.S
@@ -0,0 +1,90 @@
+/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ the result in a second limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define SIZE S1+4
+#define S2LIMB SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+ .text
+ENTRY (__mpn_mul_1)
+
+ pushl %res_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %s1_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %s2_limb
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp), %res_ptr
+ cfi_rel_offset (res_ptr, 12)
+ movl S1(%esp), %s1_ptr
+ cfi_rel_offset (s1_ptr, 8)
+ movl SIZE(%esp), %size
+ movl S2LIMB(%esp), %s2_limb
+ cfi_rel_offset (s2_limb, 0)
+ leal (%res_ptr,%size,4), %res_ptr
+ leal (%s1_ptr,%size,4), %s1_ptr
+ negl %size
+ xorl %ebp, %ebp
+ cfi_rel_offset (ebp, 4)
+ ALIGN (3)
+
+L(oop): adcl $0, %ebp
+ movl (%s1_ptr,%size,4), %eax
+
+ mull %s2_limb
+
+ addl %eax, %ebp
+
+ movl %ebp, (%res_ptr,%size,4)
+ incl %size
+
+ movl %edx, %ebp
+ jnz L(oop)
+
+ adcl $0, %ebp
+ movl %ebp, %eax
+ popl %s2_limb
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s2_limb)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %s1_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s1_ptr)
+ popl %res_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (res_ptr)
+
+ ret
+#undef size
+END (__mpn_mul_1)
diff --git a/REORG.TODO/sysdeps/i386/i586/rshift.S b/REORG.TODO/sysdeps/i386/i586/rshift.S
new file mode 100644
index 0000000000..24c76ee0bb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/rshift.S
@@ -0,0 +1,255 @@
+/* Pentium optimized __mpn_rshift --
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S RES+4
+#define SIZE S+4
+#define CNT SIZE+4
+
+ .text
+ENTRY (__mpn_rshift)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebp, 0)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 12)
+ movl S(%esp),%esi
+ cfi_rel_offset (esi, 8)
+ movl SIZE(%esp),%ebx
+ cfi_rel_offset (ebx, 0)
+ movl CNT(%esp),%ecx
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne L(normal)
+ leal 4(%edi),%eax
+ cmpl %esi,%eax
+ jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
+ leal (%edi,%ebx,4),%eax
+ cmpl %eax,%esi
+ jnc L(special) /* jump if s_ptr >= res_ptr + size */
+
+L(normal):
+ movl (%esi),%edx
+ addl $4,%esi
+ xorl %eax,%eax
+ shrdl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+ cfi_adjust_cfa_offset (4)
+
+ decl %ebx
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ shrl $3,%ebx
+ jz L(end)
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+L(oop): movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebp
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ shrdl %cl,%eax,%ebp
+ shrdl %cl,%edx,%eax
+ movl %ebp,(%edi)
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebp
+ movl 12(%esi),%eax
+ shrdl %cl,%ebp,%edx
+ shrdl %cl,%eax,%ebp
+ movl %edx,8(%edi)
+ movl %ebp,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebp
+ shrdl %cl,%edx,%eax
+ shrdl %cl,%ebp,%edx
+ movl %eax,16(%edi)
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ shrdl %cl,%eax,%ebp
+ shrdl %cl,%edx,%eax
+ movl %ebp,24(%edi)
+ movl %eax,28(%edi)
+
+ addl $32,%esi
+ addl $32,%edi
+ decl %ebx
+ jnz L(oop)
+
+L(end): popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ andl $7,%ebx
+ jz L(end2)
+L(oop2):
+ movl (%esi),%eax
+ shrdl %cl,%eax,%edx /* compute result limb */
+ movl %edx,(%edi)
+ movl %eax,%edx
+ addl $4,%esi
+ addl $4,%edi
+ decl %ebx
+ jnz L(oop2)
+
+L(end2):
+ shrl %cl,%edx /* compute most significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+ cfi_adjust_cfa_offset (-4)
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissible if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (edi, 12)
+ cfi_rel_offset (esi, 8)
+ cfi_rel_offset (ebp, 4)
+ cfi_rel_offset (ebx, 0)
+L(special):
+ leal -4(%edi,%ebx,4),%edi
+ leal -4(%esi,%ebx,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+
+ decl %ebx
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ shrl $3,%ebx
+
+ shrl $1,%edx
+ incl %ebx
+ decl %ebx
+ jz L(Lend)
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+L(Loop):
+ movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebp
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ rcrl $1,%eax
+ movl %ebp,(%edi)
+ rcrl $1,%edx
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebp
+ movl -12(%esi),%eax
+ rcrl $1,%ebp
+ movl %edx,-8(%edi)
+ rcrl $1,%eax
+ movl %ebp,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebp
+ rcrl $1,%edx
+ movl %eax,-16(%edi)
+ rcrl $1,%ebp
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ rcrl $1,%eax
+ movl %ebp,-24(%edi)
+ rcrl $1,%edx
+ movl %eax,-28(%edi)
+
+ leal -32(%esi),%esi /* use leal not to clobber carry */
+ leal -32(%edi),%edi
+ decl %ebx
+ jnz L(Loop)
+
+L(Lend):
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebx
+ jz L(Lend2)
+ addl %eax,%eax /* restore carry from eax */
+L(Loop2):
+ movl %edx,%ebp
+ movl (%esi),%edx
+ rcrl $1,%edx
+ movl %ebp,(%edi)
+
+ leal -4(%esi),%esi /* use leal not to clobber carry */
+ leal -4(%edi),%edi
+ decl %ebx
+ jnz L(Loop2)
+
+ jmp L(L1)
+L(Lend2):
+ addl %eax,%eax /* restore carry from eax */
+L(L1): movl %edx,(%edi) /* store last limb */
+
+ movl $0,%eax
+ rcrl $1,%eax
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_rshift)
diff --git a/REORG.TODO/sysdeps/i386/i586/stpcpy.S b/REORG.TODO/sysdeps/i386/i586/stpcpy.S
new file mode 100644
index 0000000000..8691efd01c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/stpcpy.S
@@ -0,0 +1,8 @@
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy
+
+#include <sysdeps/i386/i586/strcpy.S>
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
diff --git a/REORG.TODO/sysdeps/i386/i586/strchr.S b/REORG.TODO/sysdeps/i386/i586/strchr.S
new file mode 100644
index 0000000000..02f66b8f72
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strchr.S
@@ -0,0 +1,348 @@
+/* Find character CH in a NUL terminated string.
+ Highly optimized version for ix85, x>=5.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* This version is especially optimized for the i586 (and following?)
+ processors. This is mainly done by using the two pipelines. The
+ version optimized for i486 is weak in this aspect because to get
+ as much parallelism we have to execute some *more* instructions.
+
+ The code below is structured to reflect the pairing of the instructions
+ as *I think* it is. I have no processor data book to verify this.
+ If you find something you think is incorrect let me know. */
+
+
+/* The magic value which is used throughout in the whole code. */
+#define magic 0xfefefeff
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RTN PARMS
+#define STR RTN
+#define CHR STR+4
+
+ .text
+ENTRY (strchr)
+
+ pushl %edi /* Save callee-safe registers. */
+ cfi_adjust_cfa_offset (-4)
+ pushl %esi
+ cfi_adjust_cfa_offset (-4)
+
+ pushl %ebx
+ cfi_adjust_cfa_offset (-4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (-4)
+
+ movl STR(%esp), %eax
+ movl CHR(%esp), %edx
+
+ movl %eax, %edi /* duplicate string pointer for later */
+ cfi_rel_offset (edi, 12)
+ xorl %ecx, %ecx /* clear %ecx */
+
+ /* At the moment %edx contains C. What we need for the
+ algorithm is C in all bytes of the dword. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ movb %dl, %dh /* now it is 0|0|c|c */
+ movb %dl, %cl /* we construct the lower half in %ecx */
+
+ shll $16, %edx /* now %edx is c|c|0|0 */
+ movb %cl, %ch /* now %ecx is 0|0|c|c */
+
+ orl %ecx, %edx /* and finally c|c|c|c */
+ andl $3, %edi /* mask alignment bits */
+
+ jz L(11) /* alignment is 0 => start loop */
+
+ movb %dl, %cl /* 0 is needed below */
+ jp L(0) /* exactly two bits set */
+
+ xorb (%eax), %cl /* is byte the one we are looking for? */
+ jz L(out) /* yes => return pointer */
+
+ xorb %dl, %cl /* load single byte and test for NUL */
+ je L(3) /* yes => return NULL */
+
+ movb 1(%eax), %cl /* load single byte */
+ incl %eax
+
+ cmpb %cl, %dl /* is byte == C? */
+ je L(out) /* aligned => return pointer */
+
+ cmpb $0, %cl /* is byte NUL? */
+ je L(3) /* yes => return NULL */
+
+ incl %eax
+ decl %edi
+
+ jne L(11)
+
+L(0): movb (%eax), %cl /* load single byte */
+
+ cmpb %cl, %dl /* is byte == C? */
+ je L(out) /* aligned => return pointer */
+
+ cmpb $0, %cl /* is byte NUL? */
+ je L(3) /* yes => return NULL */
+
+ incl %eax /* increment pointer */
+
+ cfi_rel_offset (esi, 8)
+ cfi_rel_offset (ebx, 4)
+ cfi_rel_offset (ebp, 0)
+
+ /* The following code is the preparation for the loop. The
+ four instruction up to `L1' will not be executed in the loop
+ because the same code is found at the end of the loop, but
+ there it is executed in parallel with other instructions. */
+L(11): movl (%eax), %ecx
+ movl $magic, %ebp
+
+ movl $magic, %edi
+ addl %ecx, %ebp
+
+ /* The main loop: it looks complex and indeed it is. I would
+ love to say `it was hard to write, so it should he hard to
+ read' but I will give some more hints. To fully understand
+ this code you should first take a look at the i486 version.
+ The basic algorithm is the same, but here the code organized
+ in a way which permits to use both pipelines all the time.
+
+ I tried to make it a bit more understandable by indenting
+ the code according to stage in the algorithm. It goes as
+ follows:
+ check for 0 in 1st word
+ check for C in 1st word
+ check for 0 in 2nd word
+ check for C in 2nd word
+ check for 0 in 3rd word
+ check for C in 3rd word
+ check for 0 in 4th word
+ check for C in 4th word
+
+ Please note that doing the test for NUL before the test for
+ C allows us to overlap the test for 0 in the next word with
+ the test for C. */
+
+L(1): xorl %ecx, %ebp /* (word^magic) */
+ addl %ecx, %edi /* add magic word */
+
+ leal 4(%eax), %eax /* increment pointer */
+ jnc L(4) /* previous addl caused overflow? */
+
+ movl %ecx, %ebx /* duplicate original word */
+ orl $magic, %ebp /* (word^magic)|magic */
+
+ addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */
+ jne L(4) /* yes => we found word with NUL */
+
+ movl $magic, %esi /* load magic value */
+ xorl %edx, %ebx /* clear words which are C */
+
+ movl (%eax), %ecx
+ addl %ebx, %esi /* (word+magic) */
+
+ movl $magic, %edi
+ jnc L(5) /* previous addl caused overflow? */
+
+ movl %edi, %ebp
+ xorl %ebx, %esi /* (word+magic)^word */
+
+ addl %ecx, %ebp
+ orl $magic, %esi /* ((word+magic)^word)|magic */
+
+ addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/
+ jne L(5) /* yes => we found word with C */
+
+ xorl %ecx, %ebp
+ addl %ecx, %edi
+
+ leal 4(%eax), %eax
+ jnc L(4)
+
+ movl %ecx, %ebx
+ orl $magic, %ebp
+
+ addl $1, %ebp
+ jne L(4)
+
+ movl $magic, %esi
+ xorl %edx, %ebx
+
+ movl (%eax), %ecx
+ addl %ebx, %esi
+
+ movl $magic, %edi
+ jnc L(5)
+
+ movl %edi, %ebp
+ xorl %ebx, %esi
+
+ addl %ecx, %ebp
+ orl $magic, %esi
+
+ addl $1, %esi
+ jne L(5)
+
+ xorl %ecx, %ebp
+ addl %ecx, %edi
+
+ leal 4(%eax), %eax
+ jnc L(4)
+
+ movl %ecx, %ebx
+ orl $magic, %ebp
+
+ addl $1, %ebp
+ jne L(4)
+
+ movl $magic, %esi
+ xorl %edx, %ebx
+
+ movl (%eax), %ecx
+ addl %ebx, %esi
+
+ movl $magic, %edi
+ jnc L(5)
+
+ movl %edi, %ebp
+ xorl %ebx, %esi
+
+ addl %ecx, %ebp
+ orl $magic, %esi
+
+ addl $1, %esi
+ jne L(5)
+
+ xorl %ecx, %ebp
+ addl %ecx, %edi
+
+ leal 4(%eax), %eax
+ jnc L(4)
+
+ movl %ecx, %ebx
+ orl $magic, %ebp
+
+ addl $1, %ebp
+ jne L(4)
+
+ movl $magic, %esi
+ xorl %edx, %ebx
+
+ movl (%eax), %ecx
+ addl %ebx, %esi
+
+ movl $magic, %edi
+ jnc L(5)
+
+ movl %edi, %ebp
+ xorl %ebx, %esi
+
+ addl %ecx, %ebp
+ orl $magic, %esi
+
+ addl $1, %esi
+
+ je L(1)
+
+ /* We know there is no NUL byte but a C byte in the word.
+ %ebx contains NUL in this particular byte. */
+L(5): subl $4, %eax /* adjust pointer */
+ testb %bl, %bl /* first byte == C? */
+
+ jz L(out) /* yes => return pointer */
+
+ incl %eax /* increment pointer */
+ testb %bh, %bh /* second byte == C? */
+
+ jz L(out) /* yes => return pointer */
+
+ shrl $16, %ebx /* make upper bytes accessible */
+ incl %eax /* increment pointer */
+
+ cmp $0, %bl /* third byte == C */
+ je L(out) /* yes => return pointer */
+
+ incl %eax /* increment pointer */
+
+L(out): popl %ebp /* restore saved registers */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (edi, 12)
+ cfi_rel_offset (esi, 8)
+ cfi_rel_offset (ebx, 4)
+ cfi_rel_offset (ebp, 0)
+ /* We know there is a NUL byte in the word. But we have to test
+ whether there is an C byte before it in the word. */
+L(4): subl $4, %eax /* adjust pointer */
+ cmpb %dl, %cl /* first byte == C? */
+
+ je L(out) /* yes => return pointer */
+
+ cmpb $0, %cl /* first byte == NUL? */
+ je L(3) /* yes => return NULL */
+
+ incl %eax /* increment pointer */
+
+ cmpb %dl, %ch /* second byte == C? */
+ je L(out) /* yes => return pointer */
+
+ cmpb $0, %ch /* second byte == NUL? */
+ je L(3) /* yes => return NULL */
+
+ shrl $16, %ecx /* make upper bytes accessible */
+ incl %eax /* increment pointer */
+
+ cmpb %dl, %cl /* third byte == C? */
+ je L(out) /* yes => return pointer */
+
+ cmpb $0, %cl /* third byte == NUL? */
+ je L(3) /* yes => return NULL */
+
+ incl %eax /* increment pointer */
+
+ /* The test four the fourth byte is necessary! */
+ cmpb %dl, %ch /* fourth byte == C? */
+ je L(out) /* yes => return pointer */
+
+L(3): xorl %eax, %eax
+ jmp L(out)
+END (strchr)
+
+#undef index
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)
diff --git a/REORG.TODO/sysdeps/i386/i586/strcpy.S b/REORG.TODO/sysdeps/i386/i586/strcpy.S
new file mode 100644
index 0000000000..a444604f4f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strcpy.S
@@ -0,0 +1,169 @@
+/* strcpy/stpcpy implementation for i586.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+12 /* space for 3 saved regs */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+
+#ifndef USE_AS_STPCPY
+# define STRCPY strcpy
+#endif
+
+#define magic 0xfefefeff
+
+ .text
+ENTRY (STRCPY)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl DEST(%esp), %edi
+ cfi_rel_offset (edi, 8)
+ movl SRC(%esp), %esi
+ cfi_rel_offset (esi, 4)
+
+ xorl %eax, %eax
+ leal -1(%esi), %ecx
+
+ movl $magic, %ebx
+ cfi_rel_offset (ebx, 0)
+ andl $3, %ecx
+
+#ifdef PIC
+ call 2f
+ cfi_adjust_cfa_offset (4)
+2: popl %edx
+ cfi_adjust_cfa_offset (-4)
+ /* 0xb is the distance between 2: and 1: but we avoid writing
+ 1f-2b because the assembler generates worse code. */
+ leal 0xb(%edx,%ecx,8), %ecx
+#else
+ leal 1f(,%ecx,8), %ecx
+#endif
+
+ jmp *%ecx
+
+ .align 8
+1:
+ orb (%esi), %al
+ jz L(end)
+ stosb
+ xorl %eax, %eax
+ incl %esi
+
+ orb (%esi), %al
+ jz L(end)
+ stosb
+ xorl %eax, %eax
+ incl %esi
+
+ orb (%esi), %al
+ jz L(end)
+ stosb
+ xorl %eax, %eax
+ incl %esi
+
+L(1): movl (%esi), %ecx
+ leal 4(%esi),%esi
+
+ subl %ecx, %eax
+ addl %ebx, %ecx
+
+ decl %eax
+ jnc L(3)
+
+ movl %ecx, %edx
+ xorl %ecx, %eax
+
+ subl %ebx, %edx
+ andl $~magic, %eax
+
+ jne L(4)
+
+ movl %edx, (%edi)
+ leal 4(%edi),%edi
+
+ jmp L(1)
+
+L(3): movl %ecx, %edx
+
+ subl %ebx, %edx
+
+L(4): movb %dl, (%edi)
+ testb %dl, %dl
+
+ movl %edx, %eax
+ jz L(end2)
+
+ shrl $16, %eax
+ movb %dh, 1(%edi)
+#ifdef USE_AS_STPCPY
+ addl $1, %edi
+#endif
+
+ cmpb $0, %dh
+ jz L(end2)
+
+#ifdef USE_AS_STPCPY
+ movb %al, 1(%edi)
+ addl $1, %edi
+
+ cmpb $0, %al
+ jz L(end2)
+
+ addl $1, %edi
+#else
+ movb %al, 2(%edi)
+ testb %al, %al
+
+ leal 3(%edi), %edi
+ jz L(end2)
+#endif
+
+L(end): movb %ah, (%edi)
+
+L(end2):
+#ifdef USE_AS_STPCPY
+ movl %edi, %eax
+#else
+ movl DEST(%esp), %eax
+#endif
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (STRCPY)
+#ifndef USE_AS_STPCPY
+libc_hidden_builtin_def (strcpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/strlen.S b/REORG.TODO/sysdeps/i386/i586/strlen.S
new file mode 100644
index 0000000000..cfea2e020f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strlen.S
@@ -0,0 +1,182 @@
+/* strlen -- Compute length of NUL terminated string.
+ Highly optimized version for ix86, x>=5.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* This version is especially optimized for the i586 (and following?)
+ processors. This is mainly done by using the two pipelines. The
+ version optimized for i486 is weak in this aspect because to get
+ as much parallelism we have to execute some *more* instructions.
+
+ The code below is structured to reflect the pairing of the instructions
+ as *I think* it is. I have no processor data book to verify this.
+ If you find something you think is incorrect let me know. */
+
+
+/* The magic value which is used throughout in the whole code. */
+#define magic 0xfefefeff
+
+#define PARMS 4 /* no space for saved regs */
+#define STR PARMS
+
+ .text
+ENTRY (strlen)
+
+ movl STR(%esp), %eax
+ movl $3, %edx /* load mask (= 3) */
+
+ andl %eax, %edx /* separate last two bits of address */
+
+ jz L(1) /* aligned => start loop */
+ jp L(0) /* exactly two bits set */
+
+ cmpb %dh, (%eax) /* is byte NUL? */
+ je L(2) /* yes => return */
+
+ incl %eax /* increment pointer */
+ cmpb %dh, (%eax) /* is byte NUL? */
+
+ je L(2) /* yes => return */
+
+ incl %eax /* increment pointer */
+ xorl $2, %edx
+
+ jz L(1)
+
+L(0): cmpb %dh, (%eax) /* is byte NUL? */
+ je L(2) /* yes => return */
+
+ incl %eax /* increment pointer */
+ xorl %edx, %edx /* We need %edx == 0 for later */
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-31 is set, there will be a carry
+ into bit 32 (=carry flag), so all of the hole bits will
+ be changed.
+
+ Note: %edx == 0 in any case here. */
+
+L(1):
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ addl $4, %eax /* adjust pointer for *next* word */
+
+ subl %ecx, %edx /* first step to negate word */
+ addl $magic, %ecx /* add magic word */
+
+ decl %edx /* complete negation of word */
+ jnc L(3) /* previous addl caused overflow? */
+
+ xorl %ecx, %edx /* (word+magic)^word */
+
+ andl $~magic, %edx /* any of the carry flags set? */
+
+ jne L(3) /* yes => determine byte */
+
+
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ addl $4, %eax /* adjust pointer for *next* word */
+
+ subl %ecx, %edx /* first step to negate word */
+ addl $magic, %ecx /* add magic word */
+
+ decl %edx /* complete negation of word */
+ jnc L(3) /* previous addl caused overflow? */
+
+ xorl %ecx, %edx /* (word+magic)^word */
+
+ andl $~magic, %edx /* any of the carry flags set? */
+
+ jne L(3) /* yes => determine byte */
+
+
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ addl $4, %eax /* adjust pointer for *next* word */
+
+ subl %ecx, %edx /* first step to negate word */
+ addl $magic, %ecx /* add magic word */
+
+ decl %edx /* complete negation of word */
+ jnc L(3) /* previous addl caused overflow? */
+
+ xorl %ecx, %edx /* (word+magic)^word */
+
+ andl $~magic, %edx /* any of the carry flags set? */
+
+ jne L(3) /* yes => determine byte */
+
+
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ addl $4, %eax /* adjust pointer for *next* word */
+
+ subl %ecx, %edx /* first step to negate word */
+ addl $magic, %ecx /* add magic word */
+
+ decl %edx /* complete negation of word */
+ jnc L(3) /* previous addl caused overflow? */
+
+ xorl %ecx, %edx /* (word+magic)^word */
+
+ andl $~magic, %edx /* any of the carry flags set? */
+
+ je L(1) /* no => start loop again */
+
+
+L(3): subl $4, %eax /* correct too early pointer increment */
+ subl $magic, %ecx
+
+ cmpb $0, %cl /* lowest byte NUL? */
+ jz L(2) /* yes => return */
+
+ inc %eax /* increment pointer */
+ testb %ch, %ch /* second byte NUL? */
+
+ jz L(2) /* yes => return */
+
+ shrl $16, %ecx /* make upper bytes accessible */
+ incl %eax /* increment pointer */
+
+ cmpb $0, %cl /* is third byte NUL? */
+ jz L(2) /* yes => return */
+
+ incl %eax /* increment pointer */
+
+L(2): subl STR(%esp), %eax /* now compute the length as difference
+ between start and terminating NUL
+ character */
+ ret
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/REORG.TODO/sysdeps/i386/i586/sub_n.S b/REORG.TODO/sysdeps/i386/i586/sub_n.S
new file mode 100644
index 0000000000..21b5a2742c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/sub_n.S
@@ -0,0 +1,143 @@
+/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+ and store difference in a third limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define S2 S1+4
+#define SIZE S2+4
+
+ .text
+ENTRY (__mpn_sub_n)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 12)
+ movl S1(%esp),%esi
+ cfi_rel_offset (esi, 8)
+ movl S2(%esp),%ebx
+ cfi_rel_offset (ebx, 0)
+ movl SIZE(%esp),%ecx
+ movl (%ebx),%ebp
+ cfi_rel_offset (ebp, 4)
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx /* zero carry flag */
+ jz L(end)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+
+ ALIGN (3)
+L(oop): movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L(1): movl (%esi),%eax
+ movl 4(%esi),%edx
+ sbbl %ebp,%eax
+ movl 4(%ebx),%ebp
+ sbbl %ebp,%edx
+ movl 8(%ebx),%ebp
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L(2): movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ sbbl %ebp,%eax
+ movl 12(%ebx),%ebp
+ sbbl %ebp,%edx
+ movl 16(%ebx),%ebp
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L(3): movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ sbbl %ebp,%eax
+ movl 20(%ebx),%ebp
+ sbbl %ebp,%edx
+ movl 24(%ebx),%ebp
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L(4): movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ sbbl %ebp,%eax
+ movl 28(%ebx),%ebp
+ sbbl %ebp,%edx
+ movl 32(%ebx),%ebp
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebx),%ebx
+ decl %ecx
+ jnz L(oop)
+
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+L(end):
+ decl %edx /* test %edx w/o clobbering carry */
+ js L(end2)
+ incl %edx
+L(oop2):
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ sbbl %ebp,%eax
+ movl 4(%ebx),%ebp
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebx),%ebx
+ decl %edx
+ jnz L(oop2)
+L(end2):
+ movl (%esi),%eax
+ sbbl %ebp,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_sub_n)
diff --git a/REORG.TODO/sysdeps/i386/i586/submul_1.S b/REORG.TODO/sysdeps/i386/i586/submul_1.S
new file mode 100644
index 0000000000..5e5e121ca2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/submul_1.S
@@ -0,0 +1,94 @@
+/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ the result from a second limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define SIZE S1+4
+#define S2LIMB SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+ .text
+ENTRY (__mpn_submul_1)
+
+ pushl %res_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %s1_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %s2_limb
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp), %res_ptr
+ cfi_rel_offset (res_ptr, 12)
+ movl S1(%esp), %s1_ptr
+ cfi_rel_offset (s1_ptr, 8)
+ movl SIZE(%esp), %size
+ movl S2LIMB(%esp), %s2_limb
+ cfi_rel_offset (s2_limb, 0)
+ leal (%res_ptr,%size,4), %res_ptr
+ leal (%s1_ptr,%size,4), %s1_ptr
+ negl %size
+ xorl %ebp, %ebp
+ cfi_rel_offset (ebp, 4)
+ ALIGN (3)
+
+L(oop): adcl $0, %ebp
+ movl (%s1_ptr,%size,4), %eax
+
+ mull %s2_limb
+
+ addl %ebp, %eax
+ movl (%res_ptr,%size,4), %ebp
+
+ adcl $0, %edx
+ subl %eax, %ebp
+
+ movl %ebp, (%res_ptr,%size,4)
+ incl %size
+
+ movl %edx, %ebp
+ jnz L(oop)
+
+ adcl $0, %ebp
+ movl %ebp, %eax
+ popl %s2_limb
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s2_limb)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %s1_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s1_ptr)
+ popl %res_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (res_ptr)
+
+ ret
+#undef size
+END (__mpn_submul_1)
diff --git a/REORG.TODO/sysdeps/i386/i686/Makefile b/REORG.TODO/sysdeps/i386/i686/Makefile
new file mode 100644
index 0000000000..311042787b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/Makefile
@@ -0,0 +1,12 @@
+# So that we can test __m128's alignment
+stack-align-test-flags += -msse
+
+CFLAGS-.o += -Wa,-mtune=i686
+CFLAGS-.os += -Wa,-mtune=i686
+CFLAGS-.op += -Wa,-mtune=i686
+CFLAGS-.oS += -Wa,-mtune=i686
+
+ASFLAGS-.o += -Wa,-mtune=i686
+ASFLAGS-.os += -Wa,-mtune=i686
+ASFLAGS-.op += -Wa,-mtune=i686
+ASFLAGS-.oS += -Wa,-mtune=i686
diff --git a/REORG.TODO/sysdeps/i386/i686/add_n.S b/REORG.TODO/sysdeps/i386/i686/add_n.S
new file mode 100644
index 0000000000..4afa648ceb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/add_n.S
@@ -0,0 +1,110 @@
+/* Add two limb vectors of the same length > 0 and store sum in a third
+ limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define S2 S1+4
+#define SIZE S2+4
+
+ .text
+#ifdef PIC
+L(1): addl (%esp), %eax
+ ret
+#endif
+ENTRY (__mpn_add_n)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 4)
+ movl S1(%esp),%esi
+ cfi_rel_offset (esi, 0)
+ movl S2(%esp),%edx
+ movl SIZE(%esp),%ecx
+ movl %ecx,%eax
+ shrl $3,%ecx /* compute count for unrolled loop */
+ negl %eax
+ andl $7,%eax /* get index where to start loop */
+ jz L(oop) /* necessary special case for 0 */
+ incl %ecx /* adjust loop count */
+ shll $2,%eax /* adjustment for pointers... */
+ subl %eax,%edi /* ... since they are offset ... */
+ subl %eax,%esi /* ... by a constant when we ... */
+ subl %eax,%edx /* ... enter the loop */
+ shrl $2,%eax /* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC. */
+ leal (L(oop)-L(0)-3)(%eax,%eax,8),%eax
+ call L(1)
+L(0):
+#else
+/* Calculate start address in loop for non-PIC. */
+ leal (L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+ jmp *%eax /* jump into loop */
+ ALIGN (3)
+L(oop): movl (%esi),%eax
+ adcl (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ adcl 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ adcl 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ adcl 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ adcl 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ adcl 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ adcl 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ adcl 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz L(oop)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_add_n)
diff --git a/REORG.TODO/sysdeps/i386/i686/bcopy.S b/REORG.TODO/sysdeps/i386/i686/bcopy.S
new file mode 100644
index 0000000000..15ef9419a4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/bcopy.S
@@ -0,0 +1,3 @@
+#define USE_AS_BCOPY
+#define memmove bcopy
+#include <sysdeps/i386/i686/memmove.S>
diff --git a/REORG.TODO/sysdeps/i386/i686/bzero.S b/REORG.TODO/sysdeps/i386/i686/bzero.S
new file mode 100644
index 0000000000..c7898f18e0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/bzero.S
@@ -0,0 +1,4 @@
+#define USE_AS_BZERO
+#define memset __bzero
+#include <sysdeps/i386/i686/memset.S>
+weak_alias (__bzero, bzero)
diff --git a/REORG.TODO/sysdeps/i386/i686/dl-hash.h b/REORG.TODO/sysdeps/i386/i686/dl-hash.h
new file mode 100644
index 0000000000..ceda785b32
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/dl-hash.h
@@ -0,0 +1,79 @@
+/* Compute hash alue for given string according to ELF standard.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _DL_HASH_H
+#define _DL_HASH_H 1
+
+
+/* This is the hashing function specified by the ELF ABI. It is highly
+ optimized for the PII processors. Though it will run on i586 it
+ would be much slower than the generic C implementation. So don't
+ use it. */
+static unsigned int
+__attribute__ ((unused))
+_dl_elf_hash (const char *name)
+{
+ unsigned int result;
+ unsigned int temp0;
+ unsigned int temp1;
+
+ __asm__ __volatile__
+ ("movzbl (%1),%2\n\t"
+ "testl %2, %2\n\t"
+ "jz 1f\n\t"
+ "movl %2, %0\n\t"
+ "movzbl 1(%1), %2\n\t"
+ "jecxz 1f\n\t"
+ "shll $4, %0\n\t"
+ "addl %2, %0\n\t"
+ "movzbl 2(%1), %2\n\t"
+ "jecxz 1f\n\t"
+ "shll $4, %0\n\t"
+ "addl %2, %0\n\t"
+ "movzbl 3(%1), %2\n\t"
+ "jecxz 1f\n\t"
+ "shll $4, %0\n\t"
+ "addl %2, %0\n\t"
+ "movzbl 4(%1), %2\n\t"
+ "jecxz 1f\n\t"
+ "shll $4, %0\n\t"
+ "addl $5, %1\n\t"
+ "addl %2, %0\n\t"
+ "movzbl (%1), %2\n\t"
+ "jecxz 1f\n"
+ "2:\t"
+ "shll $4, %0\n\t"
+ "movl $0xf0000000, %3\n\t"
+ "incl %1\n\t"
+ "addl %2, %0\n\t"
+ "andl %0, %3\n\t"
+ "andl $0x0fffffff, %0\n\t"
+ "shrl $24, %3\n\t"
+ "movzbl (%1), %2\n\t"
+ "xorl %3, %0\n\t"
+ "testl %2, %2\n\t"
+ "jnz 2b\n"
+ "1:\t"
+ : "=&r" (result), "=r" (name), "=&c" (temp0), "=&r" (temp1)
+ : "0" (0), "1" ((const unsigned char *) name));
+
+ return result;
+}
+
+#endif /* dl-hash.h */
diff --git a/REORG.TODO/sysdeps/i386/i686/ffs.c b/REORG.TODO/sysdeps/i386/i686/ffs.c
new file mode 100644
index 0000000000..cbe36ff873
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/ffs.c
@@ -0,0 +1,48 @@
+/* ffs -- find first set bit in a word, counted from least significant end.
+ For Intel 80x86, x>=6.
+ This file is part of the GNU C Library.
+ Copyright (C) 1991-2017 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define ffsl __something_else
+#include <string.h>
+
+#undef ffs
+
+#ifdef __GNUC__
+
+int
+__ffs (int x)
+{
+ int cnt;
+ int tmp;
+
+ asm ("bsfl %2,%0\n" /* Count low bits in X and store in %1. */
+ "cmovel %1,%0\n" /* If number was zero, use -1 as result. */
+ : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1));
+
+ return cnt + 1;
+}
+weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
+libc_hidden_builtin_def (ffs)
+#undef ffsl
+weak_alias (__ffs, ffsl)
+
+#else
+#include <string/ffs.c>
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/e_log.S b/REORG.TODO/sysdeps/i386/i686/fpu/e_log.S
new file mode 100644
index 0000000000..73060b088c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/e_log.S
@@ -0,0 +1,29 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for i686 instructions.
+ */
+
+#include <machine/asm.h>
+
+
+ .text
+ENTRY(__ieee754_log)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+ fucomi %st
+ jp 3f
+ fyl2x // log(x)
+ ret
+
+3: fstp %st(1)
+ ret
+END (__ieee754_log)
+
+ENTRY(__log_finite)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+ fyl2x // log(x)
+ ret
+END(__log_finite)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/e_logf.S b/REORG.TODO/sysdeps/i386/i686/fpu/e_logf.S
new file mode 100644
index 0000000000..6fd39d50d3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/e_logf.S
@@ -0,0 +1,30 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Adapted for i686 instructions.
+ */
+
+#include <machine/asm.h>
+
+
+ .text
+ENTRY(__ieee754_logf)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+ fucomi %st
+ jp 3f
+ fyl2x // log(x)
+ ret
+
+3: fstp %st(1)
+ ret
+END (__ieee754_logf)
+
+ENTRY(__logf_finite)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+ fyl2x // log(x)
+ ret
+END(__logf_finite)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/e_logl.S b/REORG.TODO/sysdeps/i386/i686/fpu/e_logl.S
new file mode 100644
index 0000000000..7e3bc8d817
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/e_logl.S
@@ -0,0 +1,94 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ * Adapted for i686 instructions.
+ */
+
+#include <machine/asm.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+
+ .p2align 3
+ .type one,@object
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ .type limit,@object
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_logl)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+ fucomi %st
+ jp 3f
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ movzwl 4+8(%esp), %eax
+ cmpl $0xc000, %eax
+ jae 5f // x <= -2, avoid overflow from -LDBL_MAX - 1.
+ fsubl MO(one) // x-1 : x : log(2)
+5: fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2f
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 4f
+ fabs // log(1) is +0 in all rounding modes.
+4: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: fstp %st(1)
+ fadd %st(0)
+ ret
+END (__ieee754_logl)
+
+ENTRY(__logl_finite)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2b
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 6f
+ fabs // log(1) is +0 in all rounding modes.
+6: fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logl_finite)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/Makefile
new file mode 100644
index 0000000000..7d9089232f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/Makefile
@@ -0,0 +1,4 @@
+ifeq ($(subdir),math)
+libm-sysdep_routines += e_expf-sse2 e_expf-ia32 s_sinf-sse2 s_cosf-sse2 \
+ s_sincosf-sse2
+endif
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S
new file mode 100644
index 0000000000..b486b4d1ca
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S
@@ -0,0 +1,22 @@
+/*
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define __ieee754_expf __ieee754_expf_ia32
+#define __expf_finite __expf_finite_ia32
+
+#include <sysdeps/i386/fpu/e_expf.S>
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
new file mode 100644
index 0000000000..e6bb6fa289
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
@@ -0,0 +1,325 @@
+/* SSE2 version of __ieee754_expf and __expf_finite
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#include <sysdep.h>
+
+/* Short algorithm description:
+ *
+ * Let K = 64 (table size).
+ * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
+ * where
+ * x = m*log(2)/K + y, y in [0.0..log(2)/K]
+ * m = n*K + j, m,n,j - signed integer, j in [0..K-1]
+ * values of 2^(j/K) are tabulated as T[j].
+ *
+ * P(y) is a minimax polynomial approximation of expf(x)-1
+ * on small interval [0.0..log(2)/K].
+ *
+ * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
+ * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y
+ *
+ * Special cases:
+ * __ieee754_expf_sse2(NaN) = NaN
+ * __ieee754_expf_sse2(+INF) = +INF
+ * __ieee754_expf_sse2(-INF) = 0
+ * __ieee754_expf_sse2(x) = 1 for subnormals
+ * for finite argument, only __ieee754_expf_sse2(0)=1 is exact
+ * __ieee754_expf_sse2(x) overflows if x>700
+ * __ieee754_expf_sse2(x) underflows if x<-700
+ *
+ * Note:
+ * For |x|<700, __ieee754_expf_sse2 computes result in double precision,
+ * with accuracy a bit more than needed for expf, and does not round it
+ * to single precision.
+ */
+
+
+#ifdef PIC
+# define MO1(symbol) L(symbol)##@GOTOFF(%edx)
+# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%edx,reg2,_scale)
+#else
+# define MO1(symbol) L(symbol)
+# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
+#endif
+
+ .text
+ENTRY(__ieee754_expf_sse2)
+ /* Input: single precision x on stack at address 4(%esp) */
+
+#ifdef PIC
+ LOAD_PIC_REG(dx)
+#endif
+
+ cvtss2sd 4(%esp), %xmm1 /* Convert x to double precision */
+ mov 4(%esp), %ecx /* Copy x */
+ movsd MO1(DP_KLN2), %xmm2 /* DP K/log(2) */
+ movsd MO1(DP_P2), %xmm3 /* DP P2 */
+ movl %ecx, %eax /* x */
+ mulsd %xmm1, %xmm2 /* DP x*K/log(2) */
+ andl $0x7fffffff, %ecx /* |x| */
+ cmpl $0x442f0000, %ecx /* |x|<700 ? */
+ movsd MO1(DP_P3), %xmm4 /* DP P3 */
+ addsd MO1(DP_RS), %xmm2 /* DP x*K/log(2)+RS */
+ jae L(special_paths)
+
+ /* Here if |x|<700 */
+ cmpl $0x31800000, %ecx /* |x|<2^(-28) ? */
+ jb L(small_arg)
+
+ /* Main path: here if 2^(-28)<=|x|<700 */
+ cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */
+ movd %xmm2, %eax /* bits of n*K+j with trash */
+ subss MO1(SP_RS), %xmm2 /* SP t=round(x*K/log(2)) */
+ movl %eax, %ecx /* n*K+j with trash */
+ cvtss2sd %xmm2, %xmm2 /* DP t */
+ andl $0x3f, %eax /* bits of j */
+ mulsd MO1(DP_NLN2K), %xmm2 /* DP -t*log(2)/K */
+ andl $0xffffffc0, %ecx /* bits of n */
+#ifdef __AVX__
+ vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */
+ vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */
+#else
+ addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */
+ movaps %xmm2, %xmm0 /* DP y */
+ mulsd %xmm2, %xmm2 /* DP z=y*y */
+#endif
+ mulsd %xmm2, %xmm4 /* DP P3*z */
+ addl $0xffc0, %ecx /* bits of n + DP exponent bias */
+ mulsd %xmm2, %xmm3 /* DP P2*z */
+ shrl $2, %ecx /* High 2 bytes of DP 2^n */
+ pxor %xmm1, %xmm1 /* clear %xmm1 */
+ addsd MO1(DP_P1), %xmm4 /* DP P3*z+P1 */
+ addsd MO1(DP_P0), %xmm3 /* DP P2*z+P0 */
+ pinsrw $3, %ecx, %xmm1 /* DP 2^n */
+ mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */
+ mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */
+ addsd %xmm4, %xmm0 /* DP P(y) */
+ mulsd MO2(DP_T,%eax,8), %xmm0 /* DP P(y)*T[j] */
+ addsd MO2(DP_T,%eax,8), %xmm0 /* DP T[j]*(P(y)+1) */
+ mulsd %xmm1, %xmm0 /* DP result=2^n*(T[j]*(P(y)+1)) */
+ cvtsd2ss %xmm0, %xmm1
+
+ lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
+ movss %xmm1, 0(%esp) /* Move result from sse... */
+ flds 0(%esp) /* ...to FPU. */
+ lea 4(%esp), %esp /* Return back 4 bytes of stack frame */
+ ret
+
+ .p2align 4
+L(small_arg):
+ /* Here if 0<=|x|<2^(-28) */
+ movss 4(%esp), %xmm0 /* load x */
+ addss MO1(SP_ONE), %xmm0 /* 1.0 + x */
+ /* Return 1.0 with inexact raised, except for x==0 */
+ jmp L(epilogue)
+
+ .p2align 4
+L(special_paths):
+ /* Here if x is NaN, or Inf, or finite |x|>=700 */
+ movss 4(%esp), %xmm0 /* load x */
+
+ cmpl $0x7f800000, %ecx /* |x| is finite ? */
+ jae L(arg_inf_or_nan)
+
+ /* Here if finite |x|>=700 */
+ testl $0x80000000, %eax /* sign of x nonzero ? */
+ je L(res_overflow)
+
+ /* Here if finite x<=-700 */
+ movss MO1(SP_SMALL), %xmm0 /* load small value 2^(-100) */
+ mulss %xmm0, %xmm0 /* Return underflowed result (zero or subnormal) */
+ jmp L(epilogue)
+
+ .p2align 4
+L(res_overflow):
+ /* Here if finite x>=700 */
+ movss MO1(SP_LARGE), %xmm0 /* load large value 2^100 */
+ mulss %xmm0, %xmm0 /* Return overflowed result (Inf or max normal) */
+ jmp L(epilogue)
+
+ .p2align 4
+L(arg_inf_or_nan):
+ /* Here if |x| is Inf or NAN */
+ jne L(arg_nan) /* |x| is Inf ? */
+
+ /* Here if |x| is Inf */
+ shrl $31, %eax /* Get sign bit of x */
+ movss MO2(SP_INF_0,%eax,4), %xmm0/* return zero or Inf, depending on sign of x */
+ jmp L(epilogue)
+
+ .p2align 4
+L(arg_nan):
+ /* Here if |x| is NaN */
+ addss %xmm0, %xmm0 /* Return x+x (raise invalid) */
+
+ .p2align 4
+L(epilogue):
+ lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
+ movss %xmm0, 0(%esp) /* Move result from sse... */
+ flds 0(%esp) /* ...to FPU. */
+ lea 4(%esp), %esp /* Return back 4 bytes of stack frame */
+ ret
+END(__ieee754_expf_sse2)
+
+ .section .rodata, "a"
+ .p2align 3
+L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */
+ .long 0x00000000, 0x3ff00000
+ .long 0x3e778061, 0x3ff02c9a
+ .long 0xd3158574, 0x3ff059b0
+ .long 0x18759bc8, 0x3ff08745
+ .long 0x6cf9890f, 0x3ff0b558
+ .long 0x32d3d1a2, 0x3ff0e3ec
+ .long 0xd0125b51, 0x3ff11301
+ .long 0xaea92de0, 0x3ff1429a
+ .long 0x3c7d517b, 0x3ff172b8
+ .long 0xeb6fcb75, 0x3ff1a35b
+ .long 0x3168b9aa, 0x3ff1d487
+ .long 0x88628cd6, 0x3ff2063b
+ .long 0x6e756238, 0x3ff2387a
+ .long 0x65e27cdd, 0x3ff26b45
+ .long 0xf51fdee1, 0x3ff29e9d
+ .long 0xa6e4030b, 0x3ff2d285
+ .long 0x0a31b715, 0x3ff306fe
+ .long 0xb26416ff, 0x3ff33c08
+ .long 0x373aa9cb, 0x3ff371a7
+ .long 0x34e59ff7, 0x3ff3a7db
+ .long 0x4c123422, 0x3ff3dea6
+ .long 0x21f72e2a, 0x3ff4160a
+ .long 0x6061892d, 0x3ff44e08
+ .long 0xb5c13cd0, 0x3ff486a2
+ .long 0xd5362a27, 0x3ff4bfda
+ .long 0x769d2ca7, 0x3ff4f9b2
+ .long 0x569d4f82, 0x3ff5342b
+ .long 0x36b527da, 0x3ff56f47
+ .long 0xdd485429, 0x3ff5ab07
+ .long 0x15ad2148, 0x3ff5e76f
+ .long 0xb03a5585, 0x3ff6247e
+ .long 0x82552225, 0x3ff66238
+ .long 0x667f3bcd, 0x3ff6a09e
+ .long 0x3c651a2f, 0x3ff6dfb2
+ .long 0xe8ec5f74, 0x3ff71f75
+ .long 0x564267c9, 0x3ff75feb
+ .long 0x73eb0187, 0x3ff7a114
+ .long 0x36cf4e62, 0x3ff7e2f3
+ .long 0x994cce13, 0x3ff82589
+ .long 0x9b4492ed, 0x3ff868d9
+ .long 0x422aa0db, 0x3ff8ace5
+ .long 0x99157736, 0x3ff8f1ae
+ .long 0xb0cdc5e5, 0x3ff93737
+ .long 0x9fde4e50, 0x3ff97d82
+ .long 0x82a3f090, 0x3ff9c491
+ .long 0x7b5de565, 0x3ffa0c66
+ .long 0xb23e255d, 0x3ffa5503
+ .long 0x5579fdbf, 0x3ffa9e6b
+ .long 0x995ad3ad, 0x3ffae89f
+ .long 0xb84f15fb, 0x3ffb33a2
+ .long 0xf2fb5e47, 0x3ffb7f76
+ .long 0x904bc1d2, 0x3ffbcc1e
+ .long 0xdd85529c, 0x3ffc199b
+ .long 0x2e57d14b, 0x3ffc67f1
+ .long 0xdcef9069, 0x3ffcb720
+ .long 0x4a07897c, 0x3ffd072d
+ .long 0xdcfba487, 0x3ffd5818
+ .long 0x03db3285, 0x3ffda9e6
+ .long 0x337b9b5f, 0x3ffdfc97
+ .long 0xe78b3ff6, 0x3ffe502e
+ .long 0xa2a490da, 0x3ffea4af
+ .long 0xee615a27, 0x3ffefa1b
+ .long 0x5b6e4540, 0x3fff5076
+ .long 0x819e90d8, 0x3fffa7c1
+ .type L(DP_T), @object
+ ASM_SIZE_DIRECTIVE(L(DP_T))
+
+ .section .rodata.cst8,"aM",@progbits,8
+ .p2align 3
+L(DP_KLN2): /* double precision K/log(2) */
+ .long 0x652b82fe, 0x40571547
+ .type L(DP_KLN2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_KLN2))
+
+ .p2align 3
+L(DP_NLN2K): /* double precision -log(2)/K */
+ .long 0xfefa39ef, 0xbf862e42
+ .type L(DP_NLN2K), @object
+ ASM_SIZE_DIRECTIVE(L(DP_NLN2K))
+
+ .p2align 3
+L(DP_RS): /* double precision 2^23+2^22 */
+ .long 0x00000000, 0x41680000
+ .type L(DP_RS), @object
+ ASM_SIZE_DIRECTIVE(L(DP_RS))
+
+ .p2align 3
+L(DP_P3): /* double precision polynomial coefficient P3 */
+ .long 0xeb78fa85, 0x3fa56420
+ .type L(DP_P3), @object
+ ASM_SIZE_DIRECTIVE(L(DP_P3))
+
+ .p2align 3
+L(DP_P1): /* double precision polynomial coefficient P1 */
+ .long 0x008d6118, 0x3fe00000
+ .type L(DP_P1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_P1))
+
+ .p2align 3
+L(DP_P2): /* double precision polynomial coefficient P2 */
+ .long 0xda752d4f, 0x3fc55550
+ .type L(DP_P2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_P2))
+
+ .p2align 3
+L(DP_P0): /* double precision polynomial coefficient P0 */
+ .long 0xffffe7c6, 0x3fefffff
+ .type L(DP_P0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_P0))
+
+ .p2align 2
+L(SP_INF_0):
+ .long 0x7f800000 /* single precision Inf */
+ .long 0 /* single precision zero */
+ .type L(SP_INF_0), @object
+ ASM_SIZE_DIRECTIVE(L(SP_INF_0))
+
+ .section .rodata.cst4,"aM",@progbits,4
+ .p2align 2
+L(SP_RS): /* single precision 2^23+2^22 */
+ .long 0x4b400000
+ .type L(SP_RS), @object
+ ASM_SIZE_DIRECTIVE(L(SP_RS))
+
+ .p2align 2
+L(SP_SMALL): /* single precision small value 2^(-100) */
+ .long 0x0d800000
+ .type L(SP_SMALL), @object
+ ASM_SIZE_DIRECTIVE(L(SP_SMALL))
+
+ .p2align 2
+L(SP_LARGE): /* single precision large value 2^100 */
+ .long 0x71800000
+ .type L(SP_LARGE), @object
+ ASM_SIZE_DIRECTIVE(L(SP_LARGE))
+
+ .p2align 2
+L(SP_ONE): /* single precision 1.0 */
+ .long 0x3f800000
+ .type L(SP_ONE), @object
+ ASM_SIZE_DIRECTIVE(L(SP_ONE))
+
+strong_alias (__ieee754_expf_sse2, __expf_finite_sse2)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf.c b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf.c
new file mode 100644
index 0000000000..388cf98a39
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/e_expf.c
@@ -0,0 +1,37 @@
+/* Multiple versions of expf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern double __ieee754_expf_sse2 (double);
+extern double __ieee754_expf_ia32 (double);
+
+double __ieee754_expf (double);
+libm_ifunc (__ieee754_expf,
+ HAS_CPU_FEATURE (SSE2)
+ ? __ieee754_expf_sse2
+ : __ieee754_expf_ia32);
+
+extern double __expf_finite_sse2 (double);
+extern double __expf_finite_ia32 (double);
+
+double __expf_finite (double);
+libm_ifunc (__expf_finite,
+ HAS_CPU_FEATURE (SSE2)
+ ? __expf_finite_sse2
+ : __expf_finite_ia32);
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
new file mode 100644
index 0000000000..04bc23b37b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -0,0 +1,2188 @@
+# Begin of automatic generation
+
+# Maximal error of functions:
+Function: "acos":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_downward":
+ildouble: 2
+ldouble: 2
+
+Function: "acos_towardzero":
+ildouble: 2
+ldouble: 2
+
+Function: "acos_upward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "acosh":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 2
+
+Function: "acosh_downward":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 4
+
+Function: "acosh_towardzero":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 4
+
+Function: "acosh_upward":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 3
+
+Function: "asin":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_downward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "asin_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "asinh_downward":
+double: 1
+float: 1
+idouble: 1
+ildouble: 5
+ldouble: 5
+
+Function: "asinh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "asinh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: "atan":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atanh":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "atanh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 4
+
+Function: "atanh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 3
+
+Function: "atanh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: "cabs":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cabs_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cabs_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cabs_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "cacos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacos_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "cacos_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "cacos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_upward":
+double: 7
+float: 7
+idouble: 7
+ifloat: 7
+ildouble: 7
+ldouble: 7
+
+Function: Real part of "cacosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacosh_downward":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "cacosh_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "carg":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casin_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "casin_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "casin_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casin_upward":
+double: 7
+float: 7
+idouble: 7
+ifloat: 7
+ildouble: 7
+ldouble: 7
+
+Function: Real part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casinh_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "casinh_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "casinh_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "casinh_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "casinh_upward":
+double: 7
+float: 7
+idouble: 7
+ifloat: 7
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "casinh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catan":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catan_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "catan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "catan_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catanh":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "cbrt_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "cbrt_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "cbrt_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccos_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccos_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccosh_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccosh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccosh_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccosh_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ccosh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccosh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cexp_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cexp_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "cexp_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cexp_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "cexp_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "clog":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog10":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "clog10":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog10_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 8
+ldouble: 8
+
+Function: Imaginary part of "clog10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog10_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 8
+ldouble: 8
+
+Function: Imaginary part of "clog10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog10_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "clog10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "clog_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "clog_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "clog_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos":
+ildouble: 1
+ldouble: 1
+
+Function: "cos_downward":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "cos_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_upward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cosh":
+double: 1
+float: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cosh_downward":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 3
+
+Function: "cosh_towardzero":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cosh_upward":
+double: 4
+float: 2
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 3
+
+Function: Real part of "cpow":
+double: 2
+float: 5
+idouble: 2
+ifloat: 5
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cpow":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "cpow_downward":
+double: 5
+float: 8
+idouble: 5
+ifloat: 8
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "cpow_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow_towardzero":
+double: 5
+float: 8
+idouble: 5
+ifloat: 8
+ildouble: 7
+ldouble: 7
+
+Function: Imaginary part of "cpow_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cpow_upward":
+double: 4
+float: 1
+idouble: 4
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cpow_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+
+Function: Real part of "csin_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csin_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csin_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csin_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csinh_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csinh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csinh_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csinh_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csinh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csinh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csqrt":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "csqrt_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "csqrt_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "csqrt_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "csqrt_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "csqrt_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctan_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctan_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctan_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctan_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctan_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctanh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctanh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctanh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctanh_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "erf":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erfc":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "erfc_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "erfc_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "erfc_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "exp":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_downward":
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_upward":
+ildouble: 1
+ldouble: 1
+
+Function: "exp_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "expm1":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "expm1_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "expm1_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "expm1_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "gamma":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "gamma_downward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "gamma_towardzero":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "gamma_upward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "hypot":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j0":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "j0_downward":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "j0_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: "j0_upward":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "j1":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j1_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "j1_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "j1_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "jn":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "jn_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "jn_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "jn_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "lgamma":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "lgamma_downward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma_towardzero":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma_upward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "log":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log1p":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log1p_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "log1p_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: "log1p_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "log2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log_downward":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "log_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow10_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow10_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow_downward":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "pow_towardzero":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "pow_upward":
+double: 1
+idouble: 1
+ildouble: 4
+ldouble: 4
+
+Function: "sin":
+ildouble: 1
+ldouble: 1
+
+Function: "sin_downward":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sin_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin_upward":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sincos":
+ildouble: 1
+ldouble: 1
+
+Function: "sincos_downward":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sincos_towardzero":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sincos_upward":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sinh":
+double: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_downward":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 5
+
+Function: "sinh_towardzero":
+double: 2
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 4
+
+Function: "sinh_upward":
+double: 4
+float: 2
+idouble: 1
+ifloat: 1
+ildouble: 4
+ldouble: 5
+
+Function: "tan":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "tan_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "tan_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "tanh":
+double: 1
+idouble: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 7
+ldouble: 4
+
+Function: "tanh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 5
+ldouble: 4
+
+Function: "tgamma":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_downward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_towardzero":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: "y0":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "y0_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "y0_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "y0_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "y1":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "y1_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+Function: "y1_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "y1_upward":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 7
+ldouble: 7
+
+Function: "yn":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "yn_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "yn_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "yn_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+# end of automatic generation
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name
new file mode 100644
index 0000000000..193dd704b3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name
@@ -0,0 +1 @@
+i686
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
new file mode 100644
index 0000000000..f37850d0b3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
@@ -0,0 +1,553 @@
+/* Optimized with sse2 version of cosf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#define __need_Emath
+#include <bits/errno.h>
+
+/* Short algorithm description:
+ *
+ * 1) if |x| == 0: return 1.0-|x|.
+ * 2) if |x| < 2^-27: return 1.0-|x|.
+ * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
+ * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
+ * 5) if |x| < 9*Pi/4:
+ * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
+ * t=|x|-j*Pi/4.
+ * 5.2) Reconstruction:
+ * s = (-1.0)^((n>>2)&1)
+ * if(n&2 != 0) {
+ * using cos(t) polynomial for |t|<Pi/4, result is
+ * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
+ * } else {
+ * using sin(t) polynomial for |t|<Pi/4, result is
+ * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
+ * }
+ * 6) if |x| < 2^23, large args:
+ * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
+ * t=|x|-j*Pi/4.
+ * 6.2) Reconstruction same as (5.2).
+ * 7) if |x| >= 2^23, very large args:
+ * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
+ * t=|x|-j*Pi/4.
+ * 7.2) Reconstruction same as (5.2).
+ * 8) if x is Inf, return x-x, and set errno=EDOM.
+ * 9) if x is NaN, return x-x.
+ *
+ * Special cases:
+ * cos(+-0) = 1 not raising inexact,
+ * cos(subnormal) raises inexact,
+ * cos(min_normalized) raises inexact,
+ * cos(normalized) raises inexact,
+ * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * cos(NaN) = NaN.
+ */
+
+#ifdef PIC
+# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
+# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG) pushl REG; CFI_PUSH(REG)
+# define POP(REG) popl REG; CFI_POP(REG)
+# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
+# define ARG_X 8(%esp)
+#else
+# define MO1(symbol) L(symbol)
+# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
+# define ENTRANCE
+# define RETURN ret
+# define ARG_X 4(%esp)
+#endif
+
+ .text
+ENTRY(__cosf_sse2)
+ /* Input: single precision x on stack at address ARG_X */
+
+ ENTRANCE
+ movl ARG_X, %eax /* Bits of x */
+ cvtss2sd ARG_X, %xmm0 /* DP x */
+ andl $0x7fffffff, %eax /* |x| */
+
+ cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
+ jb L(arg_less_pio4)
+
+ /* Here if |x|>=Pi/4 */
+ movd %eax, %xmm3 /* SP |x| */
+ andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
+ movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
+
+ cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
+ jae L(large_args)
+
+ /* Here if Pi/4<=|x|<9*Pi/4 */
+ mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
+ cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
+ addl $1, %eax /* k+1 */
+ movl $0x0e, %edx
+ andl %eax, %edx /* j = (k+1)&0x0e */
+ addl $2, %eax /* n */
+ subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
+
+L(reconstruction):
+ /* Input: %eax=n, %xmm0=t */
+ testl $2, %eax /* n&2 != 0? */
+ jz L(sin_poly)
+
+/*L(cos_poly):*/
+ /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
+ * y = t*t; z = y*y;
+ * s = sign(x) * (-1.0)^((n>>2)&1)
+ * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
+ */
+ shrl $2, %eax /* n>>2 */
+ mulsd %xmm0, %xmm0 /* y=t^2 */
+ andl $1, %eax /* (n>>2)&1 */
+ movaps %xmm0, %xmm1 /* y */
+ mulsd %xmm0, %xmm0 /* z=t^4 */
+
+ movsd MO1(DP_C4), %xmm4 /* C4 */
+ mulsd %xmm0, %xmm4 /* z*C4 */
+ movsd MO1(DP_C3), %xmm3 /* C3 */
+ mulsd %xmm0, %xmm3 /* z*C3 */
+ addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
+ mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
+ lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
+ addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
+ mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
+ addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
+ mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
+
+ addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
+ /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
+ addsd MO1(DP_ONES), %xmm3
+
+ mulsd MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
+ movsd %xmm3, 0(%esp) /* Move result from sse... */
+ fldl 0(%esp) /* ...to FPU. */
+ /* Return back 4 bytes of stack frame */
+ lea 8(%esp), %esp
+ RETURN
+
+ .p2align 4
+L(sin_poly):
+ /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
+ * y = t*t; z = y*y;
+ * s = sign(x) * (-1.0)^((n>>2)&1)
+ * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
+ */
+
+ movaps %xmm0, %xmm4 /* t */
+ shrl $2, %eax /* n>>2 */
+ mulsd %xmm0, %xmm0 /* y=t^2 */
+ andl $1, %eax /* (n>>2)&1 */
+ movaps %xmm0, %xmm1 /* y */
+ mulsd %xmm0, %xmm0 /* z=t^4 */
+
+ movsd MO1(DP_S4), %xmm2 /* S4 */
+ mulsd %xmm0, %xmm2 /* z*S4 */
+ movsd MO1(DP_S3), %xmm3 /* S3 */
+ mulsd %xmm0, %xmm3 /* z*S3 */
+ lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
+ addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
+ mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
+ addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
+ mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
+ addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
+ mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
+ /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
+ mulsd MO2(DP_ONES,%eax,8), %xmm4
+ addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ mulsd %xmm4, %xmm3
+ /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ addsd %xmm4, %xmm3
+ movsd %xmm3, 0(%esp) /* Move result from sse... */
+ fldl 0(%esp) /* ...to FPU. */
+ /* Return back 4 bytes of stack frame */
+ lea 8(%esp), %esp
+ RETURN
+
+ .p2align 4
+L(large_args):
+ /* Here if |x|>=9*Pi/4 */
+ cmpl $0x7f800000, %eax /* x is Inf or NaN? */
+ jae L(arg_inf_or_nan)
+
+ /* Here if finite |x|>=9*Pi/4 */
+ cmpl $0x4b000000, %eax /* |x|<2^23? */
+ jae L(very_large_args)
+
+ /* Here if 9*Pi/4<=|x|<2^23 */
+ movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
+ mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
+ cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
+ addl $1, %eax /* k+1 */
+ movl %eax, %edx
+ andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
+ cvtsi2sdl %edx, %xmm4 /* DP j */
+ movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
+ mulsd %xmm4, %xmm2 /* -j*PIO4HI */
+ movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
+ addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
+ addl $2, %eax /* n */
+ mulsd %xmm3, %xmm4 /* j*PIO4LO */
+ addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
+ jmp L(reconstruction)
+
+ .p2align 4
+L(very_large_args):
+ /* Here if finite |x|>=2^23 */
+
+ /* bitpos = (ix>>23) - BIAS_32 + 59; */
+ shrl $23, %eax /* eb = biased exponent of x */
+ /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
+ subl $68, %eax
+ movl $28, %ecx /* %cl=28 */
+ movl %eax, %edx /* bitpos copy */
+
+ /* j = bitpos/28; */
+ div %cl /* j in register %al=%ax/%cl */
+ movapd %xmm0, %xmm3 /* |x| */
+ /* clear unneeded remainder from %ah */
+ andl $0xff, %eax
+
+ imull $28, %eax, %ecx /* j*28 */
+ movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
+ movapd %xmm0, %xmm5 /* |x| */
+ mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
+ movapd %xmm0, %xmm1 /* |x| */
+ mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
+ mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
+ addl $19, %ecx /* j*28+19 */
+ mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
+ cmpl %ecx, %edx /* bitpos>=j*28+19? */
+ jl L(very_large_skip1)
+
+ /* Here if bitpos>=j*28+19 */
+ andpd %xmm3, %xmm4 /* HI(tmp3) */
+ subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
+L(very_large_skip1):
+
+ movsd MO1(DP_2POW52), %xmm6
+ movapd %xmm5, %xmm2 /* tmp2 copy */
+ addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
+ movl $1, %edx
+ addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
+ movsd 8+MO1(DP_2POW52), %xmm4
+ movd %xmm6, %eax /* k = I64_LO(tmp6); */
+ addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
+ comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
+ jbe L(very_large_skip2)
+
+ /* Here if tmp4 > tmp5 */
+ subl $1, %eax /* k-- */
+ addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
+L(very_large_skip2):
+
+ andl %eax, %edx /* k&1 */
+ subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
+ addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
+ addsd %xmm2, %xmm3 /* t += tmp2 */
+ addsd %xmm3, %xmm0 /* t += tmp0 */
+ addl $3, %eax /* n=k+3 */
+ addsd %xmm1, %xmm0 /* t += tmp1 */
+ mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
+
+ jmp L(reconstruction) /* end of very_large_args peth */
+
+ .p2align 4
+L(arg_less_pio4):
+ /* Here if |x|<Pi/4 */
+ cmpl $0x3d000000, %eax /* |x|<2^-5? */
+ jl L(arg_less_2pn5)
+
+ /* Here if 2^-5<=|x|<Pi/4 */
+ mulsd %xmm0, %xmm0 /* y=x^2 */
+ movaps %xmm0, %xmm1 /* y */
+ mulsd %xmm0, %xmm0 /* z=x^4 */
+ movsd MO1(DP_C4), %xmm3 /* C4 */
+ mulsd %xmm0, %xmm3 /* z*C4 */
+ movsd MO1(DP_C3), %xmm5 /* C3 */
+ mulsd %xmm0, %xmm5 /* z*C3 */
+ addsd MO1(DP_C2), %xmm3 /* C2+z*C4 */
+ mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */
+ addsd MO1(DP_C1), %xmm5 /* C1+z*C3 */
+ mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */
+ addsd MO1(DP_C0), %xmm3 /* C0+z*(C2+z*C4) */
+ mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */
+ addsd %xmm5, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
+ /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
+ addsd MO1(DP_ONES), %xmm3
+ cvtsd2ss %xmm3, %xmm3 /* SP result */
+
+L(epilogue):
+ lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
+ movss %xmm3, 0(%esp) /* Move result from sse... */
+ flds 0(%esp) /* ...to FPU. */
+ /* Return back 4 bytes of stack frame */
+ lea 4(%esp), %esp
+ RETURN
+
+ .p2align 4
+L(arg_less_2pn5):
+ /* Here if |x|<2^-5 */
+ cmpl $0x32000000, %eax /* |x|<2^-27? */
+ jl L(arg_less_2pn27)
+
+ /* Here if 2^-27<=|x|<2^-5 */
+ mulsd %xmm0, %xmm0 /* DP x^2 */
+ movsd MO1(DP_COS2_1), %xmm3 /* DP DP_COS2_1 */
+ mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */
+ addsd MO1(DP_COS2_0), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
+ mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
+ /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
+ addsd MO1(DP_ONES), %xmm3
+ cvtsd2ss %xmm3, %xmm3 /* SP result */
+ jmp L(epilogue)
+
+ .p2align 4
+L(arg_less_2pn27):
+ /* Here if |x|<2^-27 */
+ movss ARG_X, %xmm0 /* x */
+ andps MO1(SP_ABS_MASK),%xmm0 /* |x| */
+ movss MO1(SP_ONE), %xmm3 /* 1.0 */
+ subss %xmm0, %xmm3 /* result is 1.0-|x| */
+ jmp L(epilogue)
+
+ .p2align 4
+L(arg_inf_or_nan):
+ /* Here if |x| is Inf or NAN */
+ jne L(skip_errno_setting) /* in case of x is NaN */
+
+ /* Here if x is Inf. Set errno to EDOM. */
+ call JUMPTARGET(__errno_location)
+ movl $EDOM, (%eax)
+
+ .p2align 4
+L(skip_errno_setting):
+ /* Here if |x| is Inf or NAN. Continued. */
+ movss ARG_X, %xmm3 /* load x */
+ subss %xmm3, %xmm3 /* Result is NaN */
+ jmp L(epilogue)
+END(__cosf_sse2)
+
+ .section .rodata, "a"
+ .p2align 3
+L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
+ .long 0x00000000,0x00000000
+ .long 0x54442d18,0x3fe921fb
+ .long 0x54442d18,0x3ff921fb
+ .long 0x7f3321d2,0x4002d97c
+ .long 0x54442d18,0x400921fb
+ .long 0x2955385e,0x400f6a7a
+ .long 0x7f3321d2,0x4012d97c
+ .long 0xe9bba775,0x4015fdbb
+ .long 0x54442d18,0x401921fb
+ .long 0xbeccb2bb,0x401c463a
+ .long 0x2955385e,0x401f6a7a
+ .type L(PIO4J), @object
+ ASM_SIZE_DIRECTIVE(L(PIO4J))
+
+ .p2align 3
+L(_FPI): /* 4/Pi broken into sum of positive DP values */
+ .long 0x00000000,0x00000000
+ .long 0x6c000000,0x3ff45f30
+ .long 0x2a000000,0x3e3c9c88
+ .long 0xa8000000,0x3c54fe13
+ .long 0xd0000000,0x3aaf47d4
+ .long 0x6c000000,0x38fbb81b
+ .long 0xe0000000,0x3714acc9
+ .long 0x7c000000,0x3560e410
+ .long 0x56000000,0x33bca2c7
+ .long 0xac000000,0x31fbd778
+ .long 0xe0000000,0x300b7246
+ .long 0xe8000000,0x2e5d2126
+ .long 0x48000000,0x2c970032
+ .long 0xe8000000,0x2ad77504
+ .long 0xe0000000,0x290921cf
+ .long 0xb0000000,0x274deb1c
+ .long 0xe0000000,0x25829a73
+ .long 0xbe000000,0x23fd1046
+ .long 0x10000000,0x2224baed
+ .long 0x8e000000,0x20709d33
+ .long 0x80000000,0x1e535a2f
+ .long 0x64000000,0x1cef904e
+ .long 0x30000000,0x1b0d6398
+ .long 0x24000000,0x1964ce7d
+ .long 0x16000000,0x17b908bf
+ .type L(_FPI), @object
+ ASM_SIZE_DIRECTIVE(L(_FPI))
+
+/* Coefficients of polynomial
+ for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */
+ .p2align 3
+L(DP_COS2_0):
+ .long 0xff5cc6fd,0xbfdfffff
+ .type L(DP_COS2_0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
+
+ .p2align 3
+L(DP_COS2_1):
+ .long 0xb178dac5,0x3fa55514
+ .type L(DP_COS2_1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
+
+ .p2align 3
+L(DP_ZERONE):
+ .long 0x00000000,0x00000000 /* 0.0 */
+ .long 0x00000000,0xbff00000 /* 1.0 */
+ .type L(DP_ZERONE),@object
+ ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
+
+ .p2align 3
+L(DP_ONES):
+ .long 0x00000000,0x3ff00000 /* +1.0 */
+ .long 0x00000000,0xbff00000 /* -1.0 */
+ .type L(DP_ONES), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ONES))
+
+/* Coefficients of polynomial
+ for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
+ .p2align 3
+L(DP_S3):
+ .long 0x64e6b5b4,0x3ec71d72
+ .type L(DP_S3), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S3))
+
+ .p2align 3
+L(DP_S1):
+ .long 0x10c2688b,0x3f811111
+ .type L(DP_S1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S1))
+
+ .p2align 3
+L(DP_S4):
+ .long 0x1674b58a,0xbe5a947e
+ .type L(DP_S4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S4))
+
+ .p2align 3
+L(DP_S2):
+ .long 0x8b4bd1f9,0xbf2a019f
+ .type L(DP_S2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S2))
+
+ .p2align 3
+L(DP_S0):
+ .long 0x55551cd9,0xbfc55555
+ .type L(DP_S0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S0))
+
+/* Coefficients of polynomial
+ for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
+ .p2align 3
+L(DP_C3):
+ .long 0x9ac43cc0,0x3efa00eb
+ .type L(DP_C3), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C3))
+
+ .p2align 3
+L(DP_C1):
+ .long 0x545c50c7,0x3fa55555
+ .type L(DP_C1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C1))
+
+ .p2align 3
+L(DP_C4):
+ .long 0xdd8844d7,0xbe923c97
+ .type L(DP_C4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C4))
+
+ .p2align 3
+L(DP_C2):
+ .long 0x348b6874,0xbf56c16b
+ .type L(DP_C2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C2))
+
+ .p2align 3
+L(DP_C0):
+ .long 0xfffe98ae,0xbfdfffff
+ .type L(DP_C0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C0))
+
+ .p2align 3
+L(DP_PIO4):
+ .long 0x54442d18,0x3fe921fb /* Pi/4 */
+ .type L(DP_PIO4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4))
+
+ .p2align 3
+L(DP_2POW52):
+ .long 0x00000000,0x43300000 /* +2^52 */
+ .long 0x00000000,0xc3300000 /* -2^52 */
+ .type L(DP_2POW52), @object
+ ASM_SIZE_DIRECTIVE(L(DP_2POW52))
+
+ .p2align 3
+L(DP_INVPIO4):
+ .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
+ .type L(DP_INVPIO4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
+
+ .p2align 3
+L(DP_PIO4HI):
+ .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
+ .type L(DP_PIO4HI), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
+
+ .p2align 3
+L(DP_PIO4LO):
+ .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
+ .type L(DP_PIO4LO), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
+
+ .p2align 2
+L(SP_INVPIO4):
+ .long 0x3fa2f983 /* 4/Pi */
+ .type L(SP_INVPIO4), @object
+ ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
+
+ .p2align 4
+L(DP_ABS_MASK): /* Mask for getting DP absolute value */
+ .long 0xffffffff,0x7fffffff
+ .long 0xffffffff,0x7fffffff
+ .type L(DP_ABS_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+
+ .p2align 3
+L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
+ .long 0x00000000,0xffffffff
+ .type L(DP_HI_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
+
+ .p2align 4
+L(SP_ABS_MASK): /* Mask for getting SP absolute value */
+ .long 0x7fffffff,0x7fffffff
+ .long 0x7fffffff,0x7fffffff
+ .type L(SP_ABS_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
+
+ .p2align 2
+L(SP_ONE):
+ .long 0x3f800000 /* 1.0 */
+ .type L(SP_ONE), @object
+ ASM_SIZE_DIRECTIVE(L(SP_ONE))
+
+weak_alias (__cosf, cosf)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
new file mode 100644
index 0000000000..af588de9dc
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
@@ -0,0 +1,29 @@
+/* Multiple versions of cosf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern float __cosf_sse2 (float);
+extern float __cosf_ia32 (float);
+float __cosf (float);
+
+libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32);
+weak_alias (__cosf, cosf);
+
+#define COSF __cosf_ia32
+#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
new file mode 100644
index 0000000000..f31a925522
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
@@ -0,0 +1,586 @@
+/* Optimized with sse2 version of sincosf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#define __need_Emath
+#include <bits/errno.h>
+
+/* Short algorithm description:
+ *
+ * 1) if |x|==0: sin(x)=x,
+ * cos(x)=1.
+ * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
+ * cos(x)=1-|x|.
+ * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
+ * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
+ * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
+ * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
+ * 5) if |x| < 9*Pi/4:
+ * 5.1) Range reduction:
+ * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
+ * 5.2) Reconstruction:
+ * sign_sin = sign(x) * (-1.0)^(( n >>2)&1)
+ * sign_cos = (-1.0)^(((n+2)>>2)&1)
+ * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
+ * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
+ * if(n&2 != 0) {
+ * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
+ * cos(x) = poly_sin * sign_cos
+ * sin(x) = poly_cos * sign_sin
+ * } else {
+ * sin(x) = poly_sin * sign_sin
+ * cos(x) = poly_cos * sign_cos
+ * }
+ * 6) if |x| < 2^23, large args:
+ * 6.1) Range reduction:
+ * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
+ * 6.2) Reconstruction same as (5.2).
+ * 7) if |x| >= 2^23, very large args:
+ * 7.1) Range reduction:
+ * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
+ * 7.2) Reconstruction same as (5.2).
+ * 8) if x is Inf, return x-x, and set errno=EDOM.
+ * 9) if x is NaN, return x-x.
+ *
+ * Special cases:
+ * sin/cos(+-0) = +-0/1 not raising inexact/underflow,
+ * sin/cos(subnormal) raises inexact/underflow,
+ * sin/cos(min_normalized) raises inexact/underflow,
+ * sin/cos(normalized) raises inexact,
+ * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * sin/cos(NaN) = NaN.
+ */
+
+#ifdef PIC
+# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
+# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG) pushl REG; CFI_PUSH(REG)
+# define POP(REG) popl REG; CFI_POP(REG)
+# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
+# define ARG_X 8(%esp)
+# define ARG_SIN_PTR 12(%esp)
+# define ARG_COS_PTR 16(%esp)
+#else
+# define MO1(symbol) L(symbol)
+# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
+# define ENTRANCE
+# define RETURN ret
+# define ARG_X 4(%esp)
+# define ARG_SIN_PTR 8(%esp)
+# define ARG_COS_PTR 12(%esp)
+#endif
+
+ .text
+ENTRY(__sincosf_sse2)
+ /* Input: single precision x on stack at address ARG_X */
+ /* pointer to sin result on stack at address ARG_SIN_PTR */
+ /* pointer to cos result on stack at address ARG_COS_PTR */
+
+ ENTRANCE
+ movl ARG_X, %eax /* Bits of x */
+ cvtss2sd ARG_X, %xmm0 /* DP x */
+ andl $0x7fffffff, %eax /* |x| */
+
+ cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */
+ jb L(arg_less_pio4)
+
+ /* Here if |x|>=Pi/4 */
+ movd %eax, %xmm3 /* SP |x| */
+ andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
+ movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
+
+ cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */
+ jae L(large_args)
+
+ /* Here if Pi/4<=|x|<9*Pi/4 */
+ mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
+ movl ARG_X, %ecx /* Load x */
+ cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
+ shrl $29, %ecx /* (sign of x) << 2 */
+ addl $1, %eax /* k+1 */
+ movl $0x0e, %edx
+ andl %eax, %edx /* j = (k+1)&0x0e */
+ subsd MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
+
+L(reconstruction):
+ /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
+
+ movaps %xmm0, %xmm4 /* t */
+ movhpd MO1(DP_ONES), %xmm4 /* 1|t */
+ mulsd %xmm0, %xmm0 /* y=t^2 */
+ movl $2, %edx
+ unpcklpd %xmm0, %xmm0 /* y|y */
+ addl %eax, %edx /* k+2 */
+ movaps %xmm0, %xmm1 /* y|y */
+ mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */
+
+ movaps MO1(DP_SC4), %xmm2 /* S4 */
+ mulpd %xmm0, %xmm2 /* z*S4 */
+ movaps MO1(DP_SC3), %xmm3 /* S3 */
+ mulpd %xmm0, %xmm3 /* z*S3 */
+ xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */
+ addpd MO1(DP_SC2), %xmm2 /* S2+z*S4 */
+ mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */
+ shrl $2, %edx /* (k+2)>>2 */
+ addpd MO1(DP_SC1), %xmm3 /* S1+z*S3 */
+ mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */
+ shrl $2, %ecx /* sign_x ^ k>>2 */
+ addpd MO1(DP_SC0), %xmm2 /* S0+z*(S2+z*S4) */
+ andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */
+ mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
+ andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */
+ addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
+ testl $2, %eax /* n&2 != 0 ? */
+ addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
+ jnz L(sin_result_sin_poly)
+
+/*L(sin_result_cos_poly):*/
+ /*
+ * Here if
+ * cos(x) = poly_sin * sign_cos
+ * sin(x) = poly_cos * sign_sin
+ */
+ movsd MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
+ movhpd MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
+ mulpd %xmm4, %xmm3 /* result_cos|result_sin */
+ movl ARG_SIN_PTR, %eax
+ cvtpd2ps %xmm3, %xmm0 /* SP results */
+ movl ARG_COS_PTR, %ecx
+ movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
+ shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
+ movss %xmm0, (%ecx) /* store cos(x) */
+ RETURN
+
+ .p2align 4
+L(sin_result_sin_poly):
+ /*
+ * Here if
+ * sin(x) = poly_sin * sign_sin
+ * cos(x) = poly_cos * sign_cos
+ */
+ movsd MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
+ movhpd MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
+ mulpd %xmm4, %xmm3 /* result_sin|result_cos */
+ movl ARG_SIN_PTR, %eax
+ cvtpd2ps %xmm3, %xmm0 /* SP results */
+ movl ARG_COS_PTR, %ecx
+ movss %xmm0, (%ecx) /* store cos(x) from xmm0[0] */
+ shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */
+ movss %xmm0, (%eax) /* store sin(x) */
+ RETURN
+
+ .p2align 4
+L(large_args):
+ /* Here if |x|>=9*Pi/4 */
+ cmpl $0x7f800000, %eax /* x is Inf or NaN ? */
+ jae L(arg_inf_or_nan)
+
+ /* Here if finite |x|>=9*Pi/4 */
+ cmpl $0x4b000000, %eax /* |x|<2^23 ? */
+ jae L(very_large_args)
+
+ /* Here if 9*Pi/4<=|x|<2^23 */
+ movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
+ mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
+ cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
+ addl $1, %eax /* k+1 */
+ movl %eax, %edx
+ andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
+ cvtsi2sdl %edx, %xmm4 /* DP j */
+ movl ARG_X, %ecx /* Load x */
+ movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
+ shrl $29, %ecx /* (sign of x) << 2 */
+ mulsd %xmm4, %xmm2 /* -j*PIO4HI */
+ movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
+ addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
+ mulsd %xmm3, %xmm4 /* j*PIO4LO */
+ addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
+ jmp L(reconstruction)
+
+ .p2align 4
+L(very_large_args):
+ /* Here if finite |x|>=2^23 */
+
+ /* bitpos = (ix>>23) - BIAS_32 + 59; */
+ shrl $23, %eax /* eb = biased exponent of x */
+ subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */
+ /*is exponent bias */
+ movl $28, %ecx /* %cl=28 */
+ movl %eax, %edx /* bitpos copy */
+
+ /* j = bitpos/28; */
+ div %cl /* j in register %al=%ax/%cl */
+ movapd %xmm0, %xmm3 /* |x| */
+ andl $0xff, %eax /* clear unneeded remainder from %ah*/
+
+ imull $28, %eax, %ecx /* j*28 */
+ movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
+ movapd %xmm0, %xmm5 /* |x| */
+ mulsd -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
+ movapd %xmm0, %xmm1 /* |x| */
+ mulsd -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
+ mulsd 0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
+ addl $19, %ecx /* j*28+19 */
+ mulsd 1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
+ cmpl %ecx, %edx /* bitpos>=j*28+19 ? */
+ jl L(very_large_skip1)
+
+ /* Here if bitpos>=j*28+19 */
+ andpd %xmm3, %xmm4 /* HI(tmp3) */
+ subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
+L(very_large_skip1):
+
+ movsd MO1(DP_2POW52), %xmm6
+ movapd %xmm5, %xmm2 /* tmp2 copy */
+ addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
+ movl $1, %edx
+ addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
+ movsd 8+MO1(DP_2POW52), %xmm4
+ movd %xmm6, %eax /* k = I64_LO(tmp6); */
+ addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
+ movl ARG_X, %ecx /* Load x */
+ comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */
+ jbe L(very_large_skip2)
+
+ /* Here if tmp4 > tmp5 */
+ subl $1, %eax /* k-- */
+ addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
+L(very_large_skip2):
+
+ andl %eax, %edx /* k&1 */
+ subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
+ addsd MO2(DP_ZERONE,%edx,8), %xmm3/* t = DP_ZERONE[k&1] + tmp3 */
+ addsd %xmm2, %xmm3 /* t += tmp2 */
+ shrl $29, %ecx /* (sign of x) << 2 */
+ addsd %xmm3, %xmm0 /* t += tmp0 */
+ addl $1, %eax /* n=k+1 */
+ addsd %xmm1, %xmm0 /* t += tmp1 */
+ mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
+
+ jmp L(reconstruction) /* end of very_large_args peth */
+
+ .p2align 4
+L(arg_less_pio4):
+ /* Here if |x|<Pi/4 */
+ cmpl $0x3d000000, %eax /* |x|<2^-5 ? */
+ jl L(arg_less_2pn5)
+
+ /* Here if 2^-5<=|x|<Pi/4 */
+ movaps %xmm0, %xmm3 /* DP x */
+ movhpd MO1(DP_ONES), %xmm3 /* DP 1|x */
+ mulsd %xmm0, %xmm0 /* DP y=x^2 */
+ unpcklpd %xmm0, %xmm0 /* DP y|y */
+ movaps %xmm0, %xmm1 /* y|y */
+ mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */
+
+ movapd MO1(DP_SC4), %xmm4 /* S4 */
+ mulpd %xmm0, %xmm4 /* z*S4 */
+ movapd MO1(DP_SC3), %xmm5 /* S3 */
+ mulpd %xmm0, %xmm5 /* z*S3 */
+ addpd MO1(DP_SC2), %xmm4 /* S2+z*S4 */
+ mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */
+ addpd MO1(DP_SC1), %xmm5 /* S1+z*S3 */
+ mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */
+ addpd MO1(DP_SC0), %xmm4 /* S0+z*(S2+z*S4) */
+ mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
+ mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
+ mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
+ addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
+ movl ARG_SIN_PTR, %eax
+ addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
+ movl ARG_COS_PTR, %ecx
+ cvtpd2ps %xmm3, %xmm0 /* SP results */
+ movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
+ shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
+ movss %xmm0, (%ecx) /* store cos(x) */
+ RETURN
+
+ .p2align 4
+L(arg_less_2pn5):
+ /* Here if |x|<2^-5 */
+ cmpl $0x32000000, %eax /* |x|<2^-27 ? */
+ jl L(arg_less_2pn27)
+
+ /* Here if 2^-27<=|x|<2^-5 */
+ movaps %xmm0, %xmm1 /* DP x */
+ movhpd MO1(DP_ONES), %xmm1 /* DP 1|x */
+ mulsd %xmm0, %xmm0 /* DP x^2 */
+ unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */
+
+ movaps MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
+ mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
+ addpd MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
+ mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
+ mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
+ addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
+ movl ARG_SIN_PTR, %eax
+ cvtpd2ps %xmm3, %xmm0 /* SP results */
+ movl ARG_COS_PTR, %ecx
+ movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
+ shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
+ movss %xmm0, (%ecx) /* store cos(x) */
+ RETURN
+
+ .p2align 4
+L(arg_less_2pn27):
+ movss ARG_X, %xmm7 /* SP x */
+ cmpl $0, %eax /* x=0 ? */
+ je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */
+ /* Here if |x|<2^-27 */
+ /*
+ * Special cases here:
+ * sin(subnormal) raises inexact/underflow
+ * sin(min_normalized) raises inexact/underflow
+ * sin(normalized) raises inexact
+ * cos(here)=1-|x| (raising inexact)
+ */
+ movaps %xmm0, %xmm3 /* DP x */
+ mulsd MO1(DP_SMALL), %xmm0 /* DP x*DP_SMALL */
+ subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */
+ andps MO1(SP_ABS_MASK), %xmm7 /* SP |x| */
+ cvtsd2ss %xmm3, %xmm0 /* sin(x) */
+ movl ARG_SIN_PTR, %eax
+ movss MO1(SP_ONE), %xmm1 /* SP 1.0 */
+ movss %xmm0, (%eax) /* sin(x) store */
+ movl ARG_COS_PTR, %ecx
+ subss %xmm7, %xmm1 /* cos(x) */
+ movss %xmm1, (%ecx) /* cos(x) store */
+ RETURN
+
+ .p2align 4
+L(arg_zero):
+ movss MO1(SP_ONE), %xmm0 /* 1.0 */
+ movl ARG_SIN_PTR, %eax
+ movl ARG_COS_PTR, %ecx
+ movss %xmm7, (%eax) /* sin(+-0)==x */
+ movss %xmm0, (%ecx) /* cos(+-0)==1 */
+ RETURN
+
+ .p2align 4
+L(arg_inf_or_nan):
+ movss ARG_X, %xmm7 /* SP x */
+ /* Here if |x| is Inf or NAN */
+ jne L(skip_errno_setting) /* in case of x is NaN */
+
+ /* Here if x is Inf. Set errno to EDOM. */
+ call JUMPTARGET(__errno_location)
+ movl $EDOM, (%eax)
+
+ .p2align 4
+L(skip_errno_setting):
+ /* Here if |x| is Inf or NAN. Continued. */
+ subss %xmm7, %xmm7 /* x-x, result is NaN */
+ movl ARG_SIN_PTR, %eax
+ movl ARG_COS_PTR, %ecx
+ movss %xmm7, (%eax)
+ movss %xmm7, (%ecx)
+ RETURN
+END(__sincosf_sse2)
+
+ .section .rodata, "a"
+ .p2align 3
+L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
+ .long 0x00000000,0x00000000
+ .long 0x54442d18,0x3fe921fb
+ .long 0x54442d18,0x3ff921fb
+ .long 0x7f3321d2,0x4002d97c
+ .long 0x54442d18,0x400921fb
+ .long 0x2955385e,0x400f6a7a
+ .long 0x7f3321d2,0x4012d97c
+ .long 0xe9bba775,0x4015fdbb
+ .long 0x54442d18,0x401921fb
+ .long 0xbeccb2bb,0x401c463a
+ .long 0x2955385e,0x401f6a7a
+ .type L(PIO4J), @object
+ ASM_SIZE_DIRECTIVE(L(PIO4J))
+
+ .p2align 3
+L(_FPI): /* 4/Pi broken into sum of positive DP values */
+ .long 0x00000000,0x00000000
+ .long 0x6c000000,0x3ff45f30
+ .long 0x2a000000,0x3e3c9c88
+ .long 0xa8000000,0x3c54fe13
+ .long 0xd0000000,0x3aaf47d4
+ .long 0x6c000000,0x38fbb81b
+ .long 0xe0000000,0x3714acc9
+ .long 0x7c000000,0x3560e410
+ .long 0x56000000,0x33bca2c7
+ .long 0xac000000,0x31fbd778
+ .long 0xe0000000,0x300b7246
+ .long 0xe8000000,0x2e5d2126
+ .long 0x48000000,0x2c970032
+ .long 0xe8000000,0x2ad77504
+ .long 0xe0000000,0x290921cf
+ .long 0xb0000000,0x274deb1c
+ .long 0xe0000000,0x25829a73
+ .long 0xbe000000,0x23fd1046
+ .long 0x10000000,0x2224baed
+ .long 0x8e000000,0x20709d33
+ .long 0x80000000,0x1e535a2f
+ .long 0x64000000,0x1cef904e
+ .long 0x30000000,0x1b0d6398
+ .long 0x24000000,0x1964ce7d
+ .long 0x16000000,0x17b908bf
+ .type L(_FPI), @object
+ ASM_SIZE_DIRECTIVE(L(_FPI))
+
+/* Coefficients of polynomials for */
+/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */
+/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
+/* for |x|<2^-5. */
+ .p2align 4
+L(DP_SINCOS2_0):
+ .long 0x5543d49d,0xbfc55555
+ .long 0xff5cc6fd,0xbfdfffff
+ .type L(DP_SINCOS2_0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
+
+ .p2align 4
+L(DP_SINCOS2_1):
+ .long 0x75cec8c5,0x3f8110f4
+ .long 0xb178dac5,0x3fa55514
+ .type L(DP_SINCOS2_1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
+
+ .p2align 3
+L(DP_ZERONE):
+ .long 0x00000000,0x00000000 /* 0.0 */
+ .long 0x00000000,0xbff00000 /* 1.0 */
+ .type L(DP_ZERONE), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
+
+ .p2align 3
+L(DP_ONES):
+ .long 0x00000000,0x3ff00000 /* +1.0 */
+ .long 0x00000000,0xbff00000 /* -1.0 */
+ .type L(DP_ONES), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ONES))
+
+/* Coefficients of polynomials for */
+/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */
+/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
+/* for |t|<Pi/4. */
+ .p2align 4
+L(DP_SC4):
+ .long 0x1674b58a,0xbe5a947e
+ .long 0xdd8844d7,0xbe923c97
+ .type L(DP_SC4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SC4))
+
+ .p2align 4
+L(DP_SC3):
+ .long 0x64e6b5b4,0x3ec71d72
+ .long 0x9ac43cc0,0x3efa00eb
+ .type L(DP_SC3), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SC3))
+
+ .p2align 4
+L(DP_SC2):
+ .long 0x8b4bd1f9,0xbf2a019f
+ .long 0x348b6874,0xbf56c16b
+ .type L(DP_SC2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SC2))
+
+ .p2align 4
+L(DP_SC1):
+ .long 0x10c2688b,0x3f811111
+ .long 0x545c50c7,0x3fa55555
+ .type L(DP_SC1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SC1))
+
+ .p2align 4
+L(DP_SC0):
+ .long 0x55551cd9,0xbfc55555
+ .long 0xfffe98ae,0xbfdfffff
+ .type L(DP_SC0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SC0))
+
+ .p2align 3
+L(DP_SMALL):
+ .long 0x00000000,0x3cd00000 /* 2^(-50) */
+ .type L(DP_SMALL), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SMALL))
+
+ .p2align 3
+L(DP_PIO4):
+ .long 0x54442d18,0x3fe921fb /* Pi/4 */
+ .type L(DP_PIO4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4))
+
+ .p2align 3
+L(DP_2POW52):
+ .long 0x00000000,0x43300000 /* +2^52 */
+ .long 0x00000000,0xc3300000 /* -2^52 */
+ .type L(DP_2POW52), @object
+ ASM_SIZE_DIRECTIVE(L(DP_2POW52))
+
+ .p2align 3
+L(DP_INVPIO4):
+ .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
+ .type L(DP_INVPIO4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
+
+ .p2align 3
+L(DP_PIO4HI):
+ .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
+ .type L(DP_PIO4HI), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
+
+ .p2align 3
+L(DP_PIO4LO):
+ .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
+ .type L(DP_PIO4LO), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
+
+ .p2align 2
+L(SP_INVPIO4):
+ .long 0x3fa2f983 /* 4/Pi */
+ .type L(SP_INVPIO4), @object
+ ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
+
+ .p2align 4
+L(DP_ABS_MASK): /* Mask for getting DP absolute value */
+ .long 0xffffffff,0x7fffffff
+ .long 0xffffffff,0x7fffffff
+ .type L(DP_ABS_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+
+ .p2align 3
+L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
+ .long 0x00000000,0xffffffff
+ .type L(DP_HI_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
+
+ .p2align 4
+L(SP_ABS_MASK): /* Mask for getting SP absolute value */
+ .long 0x7fffffff,0x7fffffff
+ .long 0x7fffffff,0x7fffffff
+ .type L(SP_ABS_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
+
+ .p2align 2
+L(SP_ONE):
+ .long 0x3f800000 /* 1.0 */
+ .type L(SP_ONE), @object
+ ASM_SIZE_DIRECTIVE(L(SP_ONE))
+
+weak_alias(__sincosf, sincosf)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
new file mode 100644
index 0000000000..9428f9b4ea
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
@@ -0,0 +1,30 @@
+/* Multiple versions of sincosf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern void __sincosf_sse2 (float, float *, float *);
+extern void __sincosf_ia32 (float, float *, float *);
+void __sincosf (float, float *, float *);
+
+libm_ifunc (__sincosf,
+ HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32);
+weak_alias (__sincosf, sincosf);
+
+#define SINCOSF __sincosf_ia32
+#include <sysdeps/ieee754/flt-32/s_sincosf.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
new file mode 100644
index 0000000000..ee96018061
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
@@ -0,0 +1,566 @@
+/* Optimized with sse2 version of sinf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#define __need_Emath
+#include <bits/errno.h>
+
+/* Short algorithm description:
+ *
+ * 1) if |x| == 0: return x.
+ * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
+ * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
+ * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
+ * 5) if |x| < 9*Pi/4:
+ * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
+ * t=|x|-j*Pi/4.
+ * 5.2) Reconstruction:
+ * s = sign(x) * (-1.0)^((n>>2)&1)
+ * if(n&2 != 0) {
+ * using cos(t) polynomial for |t|<Pi/4, result is
+ * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
+ * } else {
+ * using sin(t) polynomial for |t|<Pi/4, result is
+ * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
+ * }
+ * 6) if |x| < 2^23, large args:
+ * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
+ * t=|x|-j*Pi/4.
+ * 6.2) Reconstruction same as (5.2).
+ * 7) if |x| >= 2^23, very large args:
+ * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
+ * t=|x|-j*Pi/4.
+ * 7.2) Reconstruction same as (5.2).
+ * 8) if x is Inf, return x-x, and set errno=EDOM.
+ * 9) if x is NaN, return x-x.
+ *
+ * Special cases:
+ * sin(+-0) = +-0 not raising inexact/underflow,
+ * sin(subnormal) raises inexact/underflow,
+ * sin(min_normalized) raises inexact/underflow,
+ * sin(normalized) raises inexact,
+ * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * sin(NaN) = NaN.
+ */
+
+#ifdef PIC
+# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
+# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG) pushl REG; CFI_PUSH(REG)
+# define POP(REG) popl REG; CFI_POP(REG)
+# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
+# define ARG_X 8(%esp)
+#else
+# define MO1(symbol) L(symbol)
+# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
+# define ENTRANCE
+# define RETURN ret
+# define ARG_X 4(%esp)
+#endif
+
+ .text
+ENTRY(__sinf_sse2)
+ /* Input: single precision x on stack at address ARG_X */
+
+ ENTRANCE
+ movl ARG_X, %eax /* Bits of x */
+ cvtss2sd ARG_X, %xmm0 /* DP x */
+ andl $0x7fffffff, %eax /* |x| */
+
+ cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
+ jb L(arg_less_pio4)
+
+ /* Here if |x|>=Pi/4 */
+ movd %eax, %xmm3 /* SP |x| */
+ andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
+ movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
+
+ cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
+ jae L(large_args)
+
+ /* Here if Pi/4<=|x|<9*Pi/4 */
+ mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
+ movl ARG_X, %ecx /* Load x */
+ cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
+ shrl $31, %ecx /* sign of x */
+ addl $1, %eax /* k+1 */
+ movl $0x0e, %edx
+ andl %eax, %edx /* j = (k+1)&0x0e */
+ subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
+
+L(reconstruction):
+ /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
+ testl $2, %eax /* n&2 != 0? */
+ jz L(sin_poly)
+
+/*L(cos_poly):*/
+ /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
+ * y = t*t; z = y*y;
+ * s = sign(x) * (-1.0)^((n>>2)&1)
+ * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
+ */
+ shrl $2, %eax /* n>>2 */
+ mulsd %xmm0, %xmm0 /* y=t^2 */
+ andl $1, %eax /* (n>>2)&1 */
+ movaps %xmm0, %xmm1 /* y */
+ mulsd %xmm0, %xmm0 /* z=t^4 */
+
+ movsd MO1(DP_C4), %xmm4 /* C4 */
+ mulsd %xmm0, %xmm4 /* z*C4 */
+ xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
+ movsd MO1(DP_C3), %xmm3 /* C3 */
+ mulsd %xmm0, %xmm3 /* z*C3 */
+ addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
+ mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
+ lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
+ addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
+ mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
+ addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
+ mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
+
+ addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
+ /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
+ addsd MO1(DP_ONES), %xmm3
+
+ mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
+ movsd %xmm3, 0(%esp) /* Move result from sse... */
+ fldl 0(%esp) /* ...to FPU. */
+ /* Return back 4 bytes of stack frame */
+ lea 8(%esp), %esp
+ RETURN
+
+ .p2align 4
+L(sin_poly):
+ /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
+ * y = t*t; z = y*y;
+ * s = sign(x) * (-1.0)^((n>>2)&1)
+ * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
+ */
+
+ movaps %xmm0, %xmm4 /* t */
+ shrl $2, %eax /* n>>2 */
+ mulsd %xmm0, %xmm0 /* y=t^2 */
+ andl $1, %eax /* (n>>2)&1 */
+ movaps %xmm0, %xmm1 /* y */
+ xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
+ mulsd %xmm0, %xmm0 /* z=t^4 */
+
+ movsd MO1(DP_S4), %xmm2 /* S4 */
+ mulsd %xmm0, %xmm2 /* z*S4 */
+ movsd MO1(DP_S3), %xmm3 /* S3 */
+ mulsd %xmm0, %xmm3 /* z*S3 */
+ lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
+ addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
+ mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
+ addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
+ mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
+ addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
+ mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
+ /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
+ mulsd MO2(DP_ONES,%ecx,8), %xmm4
+ addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ mulsd %xmm4, %xmm3
+ /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ addsd %xmm4, %xmm3
+ movsd %xmm3, 0(%esp) /* Move result from sse... */
+ fldl 0(%esp) /* ...to FPU. */
+ /* Return back 4 bytes of stack frame */
+ lea 8(%esp), %esp
+ RETURN
+
+ .p2align 4
+L(large_args):
+ /* Here if |x|>=9*Pi/4 */
+ cmpl $0x7f800000, %eax /* x is Inf or NaN? */
+ jae L(arg_inf_or_nan)
+
+ /* Here if finite |x|>=9*Pi/4 */
+ cmpl $0x4b000000, %eax /* |x|<2^23? */
+ jae L(very_large_args)
+
+ /* Here if 9*Pi/4<=|x|<2^23 */
+ movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
+ mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
+ cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
+ addl $1, %eax /* k+1 */
+ movl %eax, %edx
+ andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
+ cvtsi2sdl %edx, %xmm4 /* DP j */
+ movl ARG_X, %ecx /* Load x */
+ movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
+ shrl $31, %ecx /* sign bit of x */
+ mulsd %xmm4, %xmm2 /* -j*PIO4HI */
+ movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
+ addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
+ mulsd %xmm3, %xmm4 /* j*PIO4LO */
+ addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
+ jmp L(reconstruction)
+
+ .p2align 4
+L(very_large_args):
+ /* Here if finite |x|>=2^23 */
+
+ /* bitpos = (ix>>23) - BIAS_32 + 59; */
+ shrl $23, %eax /* eb = biased exponent of x */
+ /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
+ subl $68, %eax
+ movl $28, %ecx /* %cl=28 */
+ movl %eax, %edx /* bitpos copy */
+
+ /* j = bitpos/28; */
+ div %cl /* j in register %al=%ax/%cl */
+ movapd %xmm0, %xmm3 /* |x| */
+ /* clear unneeded remainder from %ah */
+ andl $0xff, %eax
+
+ imull $28, %eax, %ecx /* j*28 */
+ movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
+ movapd %xmm0, %xmm5 /* |x| */
+ mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
+ movapd %xmm0, %xmm1 /* |x| */
+ mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
+ mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
+ addl $19, %ecx /* j*28+19 */
+ mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
+ cmpl %ecx, %edx /* bitpos>=j*28+19? */
+ jl L(very_large_skip1)
+
+ /* Here if bitpos>=j*28+19 */
+ andpd %xmm3, %xmm4 /* HI(tmp3) */
+ subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
+L(very_large_skip1):
+
+ movsd MO1(DP_2POW52), %xmm6
+ movapd %xmm5, %xmm2 /* tmp2 copy */
+ addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
+ movl $1, %edx
+ addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
+ movsd 8+MO1(DP_2POW52), %xmm4
+ movd %xmm6, %eax /* k = I64_LO(tmp6); */
+ addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
+ movl ARG_X, %ecx /* Load x */
+ comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
+ jbe L(very_large_skip2)
+
+ /* Here if tmp4 > tmp5 */
+ subl $1, %eax /* k-- */
+ addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
+L(very_large_skip2):
+
+ andl %eax, %edx /* k&1 */
+ subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
+ addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
+ addsd %xmm2, %xmm3 /* t += tmp2 */
+ shrl $31, %ecx /* sign of x */
+ addsd %xmm3, %xmm0 /* t += tmp0 */
+ addl $1, %eax /* n=k+1 */
+ addsd %xmm1, %xmm0 /* t += tmp1 */
+ mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
+
+ jmp L(reconstruction) /* end of very_large_args peth */
+
+ .p2align 4
+L(arg_less_pio4):
+ /* Here if |x|<Pi/4 */
+ cmpl $0x3d000000, %eax /* |x|<2^-5? */
+ jl L(arg_less_2pn5)
+
+ /* Here if 2^-5<=|x|<Pi/4 */
+ movaps %xmm0, %xmm3 /* x */
+ mulsd %xmm0, %xmm0 /* y=x^2 */
+ movaps %xmm0, %xmm1 /* y */
+ mulsd %xmm0, %xmm0 /* z=x^4 */
+ movsd MO1(DP_S4), %xmm4 /* S4 */
+ mulsd %xmm0, %xmm4 /* z*S4 */
+ movsd MO1(DP_S3), %xmm5 /* S3 */
+ mulsd %xmm0, %xmm5 /* z*S3 */
+ addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */
+ mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */
+ addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */
+ mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */
+ addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */
+ mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
+ mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
+ mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
+ /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ addsd %xmm5, %xmm4
+ /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
+ addsd %xmm4, %xmm3
+ cvtsd2ss %xmm3, %xmm3 /* SP result */
+
+L(epilogue):
+ lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
+ movss %xmm3, 0(%esp) /* Move result from sse... */
+ flds 0(%esp) /* ...to FPU. */
+ /* Return back 4 bytes of stack frame */
+ lea 4(%esp), %esp
+ RETURN
+
+ .p2align 4
+L(arg_less_2pn5):
+ /* Here if |x|<2^-5 */
+ cmpl $0x32000000, %eax /* |x|<2^-27? */
+ jl L(arg_less_2pn27)
+
+ /* Here if 2^-27<=|x|<2^-5 */
+ movaps %xmm0, %xmm1 /* DP x */
+ mulsd %xmm0, %xmm0 /* DP x^2 */
+ movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */
+ mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
+ addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
+ mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
+ mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
+ addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
+ cvtsd2ss %xmm3, %xmm3 /* SP result */
+ jmp L(epilogue)
+
+ .p2align 4
+L(arg_less_2pn27):
+ movss ARG_X, %xmm3 /* SP x */
+ cmpl $0, %eax /* x=0? */
+ je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */
+ /* Here if |x|<2^-27 */
+ /*
+ * Special cases here:
+ * sin(subnormal) raises inexact/underflow
+ * sin(min_normalized) raises inexact/underflow
+ * sin(normalized) raises inexact
+ */
+ movaps %xmm0, %xmm3 /* Copy of DP x */
+ mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */
+ subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */
+ cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */
+ jmp L(epilogue)
+
+ .p2align 4
+L(arg_inf_or_nan):
+ /* Here if |x| is Inf or NAN */
+ jne L(skip_errno_setting) /* in case of x is NaN */
+
+ /* Here if x is Inf. Set errno to EDOM. */
+ call JUMPTARGET(__errno_location)
+ movl $EDOM, (%eax)
+
+ .p2align 4
+L(skip_errno_setting):
+ /* Here if |x| is Inf or NAN. Continued. */
+ movss ARG_X, %xmm3 /* load x */
+ subss %xmm3, %xmm3 /* Result is NaN */
+ jmp L(epilogue)
+END(__sinf_sse2)
+
+ .section .rodata, "a"
+ .p2align 3
+L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
+ .long 0x00000000,0x00000000
+ .long 0x54442d18,0x3fe921fb
+ .long 0x54442d18,0x3ff921fb
+ .long 0x7f3321d2,0x4002d97c
+ .long 0x54442d18,0x400921fb
+ .long 0x2955385e,0x400f6a7a
+ .long 0x7f3321d2,0x4012d97c
+ .long 0xe9bba775,0x4015fdbb
+ .long 0x54442d18,0x401921fb
+ .long 0xbeccb2bb,0x401c463a
+ .long 0x2955385e,0x401f6a7a
+ .type L(PIO4J), @object
+ ASM_SIZE_DIRECTIVE(L(PIO4J))
+
+ .p2align 3
+L(_FPI): /* 4/Pi broken into sum of positive DP values */
+ .long 0x00000000,0x00000000
+ .long 0x6c000000,0x3ff45f30
+ .long 0x2a000000,0x3e3c9c88
+ .long 0xa8000000,0x3c54fe13
+ .long 0xd0000000,0x3aaf47d4
+ .long 0x6c000000,0x38fbb81b
+ .long 0xe0000000,0x3714acc9
+ .long 0x7c000000,0x3560e410
+ .long 0x56000000,0x33bca2c7
+ .long 0xac000000,0x31fbd778
+ .long 0xe0000000,0x300b7246
+ .long 0xe8000000,0x2e5d2126
+ .long 0x48000000,0x2c970032
+ .long 0xe8000000,0x2ad77504
+ .long 0xe0000000,0x290921cf
+ .long 0xb0000000,0x274deb1c
+ .long 0xe0000000,0x25829a73
+ .long 0xbe000000,0x23fd1046
+ .long 0x10000000,0x2224baed
+ .long 0x8e000000,0x20709d33
+ .long 0x80000000,0x1e535a2f
+ .long 0x64000000,0x1cef904e
+ .long 0x30000000,0x1b0d6398
+ .long 0x24000000,0x1964ce7d
+ .long 0x16000000,0x17b908bf
+ .type L(_FPI), @object
+ ASM_SIZE_DIRECTIVE(L(_FPI))
+
+/* Coefficients of polynomial
+ for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */
+ .p2align 3
+L(DP_SIN2_0):
+ .long 0x5543d49d,0xbfc55555
+ .type L(DP_SIN2_0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
+
+ .p2align 3
+L(DP_SIN2_1):
+ .long 0x75cec8c5,0x3f8110f4
+ .type L(DP_SIN2_1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
+
+ .p2align 3
+L(DP_ZERONE):
+ .long 0x00000000,0x00000000 /* 0.0 */
+ .long 0x00000000,0xbff00000 /* 1.0 */
+ .type L(DP_ZERONE), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
+
+ .p2align 3
+L(DP_ONES):
+ .long 0x00000000,0x3ff00000 /* +1.0 */
+ .long 0x00000000,0xbff00000 /* -1.0 */
+ .type L(DP_ONES), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ONES))
+
+/* Coefficients of polynomial
+ for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
+ .p2align 3
+L(DP_S3):
+ .long 0x64e6b5b4,0x3ec71d72
+ .type L(DP_S3), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S3))
+
+ .p2align 3
+L(DP_S1):
+ .long 0x10c2688b,0x3f811111
+ .type L(DP_S1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S1))
+
+ .p2align 3
+L(DP_S4):
+ .long 0x1674b58a,0xbe5a947e
+ .type L(DP_S4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S4))
+
+ .p2align 3
+L(DP_S2):
+ .long 0x8b4bd1f9,0xbf2a019f
+ .type L(DP_S2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S2))
+
+ .p2align 3
+L(DP_S0):
+ .long 0x55551cd9,0xbfc55555
+ .type L(DP_S0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_S0))
+
+ .p2align 3
+L(DP_SMALL):
+ .long 0x00000000,0x3cd00000 /* 2^(-50) */
+ .type L(DP_SMALL), @object
+ ASM_SIZE_DIRECTIVE(L(DP_SMALL))
+
+/* Coefficients of polynomial
+ for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
+ .p2align 3
+L(DP_C3):
+ .long 0x9ac43cc0,0x3efa00eb
+ .type L(DP_C3), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C3))
+
+ .p2align 3
+L(DP_C1):
+ .long 0x545c50c7,0x3fa55555
+ .type L(DP_C1), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C1))
+
+ .p2align 3
+L(DP_C4):
+ .long 0xdd8844d7,0xbe923c97
+ .type L(DP_C4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C4))
+
+ .p2align 3
+L(DP_C2):
+ .long 0x348b6874,0xbf56c16b
+ .type L(DP_C2), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C2))
+
+ .p2align 3
+L(DP_C0):
+ .long 0xfffe98ae,0xbfdfffff
+ .type L(DP_C0), @object
+ ASM_SIZE_DIRECTIVE(L(DP_C0))
+
+ .p2align 3
+L(DP_PIO4):
+ .long 0x54442d18,0x3fe921fb /* Pi/4 */
+ .type L(DP_PIO4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4))
+
+ .p2align 3
+L(DP_2POW52):
+ .long 0x00000000,0x43300000 /* +2^52 */
+ .long 0x00000000,0xc3300000 /* -2^52 */
+ .type L(DP_2POW52), @object
+ ASM_SIZE_DIRECTIVE(L(DP_2POW52))
+
+ .p2align 3
+L(DP_INVPIO4):
+ .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
+ .type L(DP_INVPIO4), @object
+ ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
+
+ .p2align 3
+L(DP_PIO4HI):
+ .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
+ .type L(DP_PIO4HI), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
+
+ .p2align 3
+L(DP_PIO4LO):
+ .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
+ .type L(DP_PIO4LO), @object
+ ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
+
+ .p2align 2
+L(SP_INVPIO4):
+ .long 0x3fa2f983 /* 4/Pi */
+ .type L(SP_INVPIO4), @object
+ ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
+
+ .p2align 4
+L(DP_ABS_MASK): /* Mask for getting DP absolute value */
+ .long 0xffffffff,0x7fffffff
+ .long 0xffffffff,0x7fffffff
+ .type L(DP_ABS_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+
+ .p2align 3
+L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
+ .long 0x00000000,0xffffffff
+ .type L(DP_HI_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
+
+weak_alias (__sinf, sinf)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
new file mode 100644
index 0000000000..8ccdd2f34d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
@@ -0,0 +1,28 @@
+/* Multiple versions of sinf
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern float __sinf_sse2 (float);
+extern float __sinf_ia32 (float);
+float __sinf (float);
+
+libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32);
+weak_alias (__sinf, sinf);
+#define SINF __sinf_ia32
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/s_fmax.S b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmax.S
new file mode 100644
index 0000000000..ace8db9410
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmax.S
@@ -0,0 +1,39 @@
+/* Compute maximum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmax)
+ fldl 4(%esp) // x
+ fldl 12(%esp) // x : y
+
+ fucomi %st(0), %st
+ fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
+
+ fxch
+
+ fucomi %st(1), %st
+ fcmovb %st(1), %st
+
+ fstp %st(1)
+
+ ret
+END(__fmax)
+weak_alias (__fmax, fmax)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxf.S b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxf.S
new file mode 100644
index 0000000000..3a25951a09
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxf.S
@@ -0,0 +1,39 @@
+/* Compute maximum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmaxf)
+ flds 4(%esp) // x
+ flds 8(%esp) // x : y
+
+ fucomi %st(0), %st
+ fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
+
+ fxch
+
+ fucomi %st(1), %st
+ fcmovb %st(1), %st
+
+ fstp %st(1)
+
+ ret
+END(__fmaxf)
+weak_alias (__fmaxf, fmaxf)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxl.S b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxl.S
new file mode 100644
index 0000000000..3f6c21c63d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmaxl.S
@@ -0,0 +1,58 @@
+/* Compute maximum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmaxl)
+ fldt 4(%esp) // x
+ fldt 16(%esp) // x : y
+
+ fucomi %st(1), %st
+ jp 2f
+ fcmovb %st(1), %st
+
+ fstp %st(1)
+
+ ret
+
+2: // Unordered.
+ fucomi %st(0), %st
+ jp 3f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 11(%esp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+3: // st(0) is a NaN; st(1) may or may not be.
+ fxch
+ fucomi %st(0), %st
+ jp 4f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 23(%esp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ faddp
+ ret
+END(__fmaxl)
+weak_alias (__fmaxl, fmaxl)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/s_fmin.S b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmin.S
new file mode 100644
index 0000000000..72d306fd79
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/s_fmin.S
@@ -0,0 +1,37 @@
+/* Compute minimum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fmin)
+ fldl 4(%esp) // x
+ fldl 12(%esp) // x : y
+
+ fucomi %st(0), %st
+ fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
+
+ fucomi %st(1), %st
+ fcmovnb %st(1), %st
+
+ fstp %st(1)
+
+ ret
+END(__fmin)
+weak_alias (__fmin, fmin)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/s_fminf.S b/REORG.TODO/sysdeps/i386/i686/fpu/s_fminf.S
new file mode 100644
index 0000000000..52ea892bad
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/s_fminf.S
@@ -0,0 +1,37 @@
+/* Compute minimum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fminf)
+ flds 4(%esp) // x
+ flds 8(%esp) // x : y
+
+ fucomi %st(0), %st
+ fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
+
+ fucomi %st(1), %st
+ fcmovnb %st(1), %st
+
+ fstp %st(1)
+
+ ret
+END(__fminf)
+weak_alias (__fminf, fminf)
diff --git a/REORG.TODO/sysdeps/i386/i686/fpu/s_fminl.S b/REORG.TODO/sysdeps/i386/i686/fpu/s_fminl.S
new file mode 100644
index 0000000000..e1cb83fed7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/fpu/s_fminl.S
@@ -0,0 +1,58 @@
+/* Compute minimum of two numbers, regarding NaN as missing argument.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__fminl)
+ fldt 4(%esp) // x
+ fldt 16(%esp) // x : y
+
+ fucomi %st(1), %st
+ jp 2f
+ fcmovnb %st(1), %st
+
+ fstp %st(1)
+
+ ret
+
+2: // Unordered.
+ fucomi %st(0), %st
+ jp 3f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 11(%esp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+3: // st(0) is a NaN; st(1) may or may not be.
+ fxch
+ fucomi %st(0), %st
+ jp 4f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 23(%esp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ faddp
+ ret
+END(__fminl)
+weak_alias (__fminl, fminl)
diff --git a/REORG.TODO/sysdeps/i386/i686/hp-timing.h b/REORG.TODO/sysdeps/i386/i686/hp-timing.h
new file mode 100644
index 0000000000..1b11410feb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/hp-timing.h
@@ -0,0 +1,42 @@
+/* High precision, low overhead timing functions. i686 version.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _HP_TIMING_H
+#define _HP_TIMING_H 1
+
+/* We always assume having the timestamp register. */
+#define HP_TIMING_AVAIL (1)
+#define HP_SMALL_TIMING_AVAIL (1)
+
+/* We indeed have inlined functions. */
+#define HP_TIMING_INLINE (1)
+
+/* We use 64bit values for the times. */
+typedef unsigned long long int hp_timing_t;
+
+/* That's quite simple. Use the `rdtsc' instruction. Note that the value
+ might not be 100% accurate since there might be some more instructions
+ running in this moment. This could be changed by using a barrier like
+ 'cpuid' right before the `rdtsc' instruciton. But we are not interested
+ in accurate clock cycles here so we don't do this. */
+#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
+
+#include <hp-timing-common.h>
+
+#endif /* hp-timing.h */
diff --git a/REORG.TODO/sysdeps/i386/i686/init-arch.h b/REORG.TODO/sysdeps/i386/i686/init-arch.h
new file mode 100644
index 0000000000..f55f80efa0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/init-arch.h
@@ -0,0 +1,19 @@
+/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define MINIMUM_ISA 686
+#include <sysdeps/x86/init-arch.h>
diff --git a/REORG.TODO/sysdeps/i386/i686/memcmp.S b/REORG.TODO/sysdeps/i386/i686/memcmp.S
new file mode 100644
index 0000000000..5140ee2145
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/memcmp.S
@@ -0,0 +1,408 @@
+/* Compare two memory blocks for differences in the first COUNT bytes.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* Preserve EBX. */
+#define BLK1 PARMS
+#define BLK2 BLK1+4
+#define LEN BLK2+4
+#define ENTRANCE pushl %ebx; cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (ebx, 0)
+#define RETURN popl %ebx; cfi_adjust_cfa_offset (-4); \
+ cfi_restore (ebx); ret
+
+/* Load an entry in a jump table into EBX. TABLE is a jump table
+ with relative offsets. INDEX is a register contains the index
+ into the jump table. */
+#define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,4), %ebx
+
+ .text
+ ALIGN (4)
+ENTRY (memcmp)
+ ENTRANCE
+
+ movl BLK1(%esp), %eax
+ movl BLK2(%esp), %edx
+ movl LEN(%esp), %ecx
+
+ cmpl $1, %ecx
+ jne L(not_1)
+ movzbl (%eax), %ecx /* LEN == 1 */
+ cmpb (%edx), %cl
+ jne L(neq)
+L(bye):
+ xorl %eax, %eax
+ RETURN
+
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+L(neq):
+ sbbl %eax, %eax
+ sbbl $-1, %eax
+ RETURN
+
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+L(not_1):
+ jl L(bye) /* LEN == 0 */
+
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ movl %eax, %esi
+ cfi_rel_offset (esi, 0)
+ cmpl $32, %ecx;
+ jge L(32bytesormore) /* LEN => 32 */
+
+ LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
+ addl %ecx, %edx
+ addl %ecx, %esi
+ jmp *%ebx
+
+ ALIGN (4)
+L(28bytes):
+ movl -28(%esi), %eax
+ movl -28(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(24bytes):
+ movl -24(%esi), %eax
+ movl -24(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(20bytes):
+ movl -20(%esi), %eax
+ movl -20(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(16bytes):
+ movl -16(%esi), %eax
+ movl -16(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(12bytes):
+ movl -12(%esi), %eax
+ movl -12(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(8bytes):
+ movl -8(%esi), %eax
+ movl -8(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(4bytes):
+ movl -4(%esi), %eax
+ movl -4(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(0bytes):
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ xorl %eax, %eax
+ RETURN
+
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (esi, 0)
+ cfi_rel_offset (ebx, 4)
+L(29bytes):
+ movl -29(%esi), %eax
+ movl -29(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(25bytes):
+ movl -25(%esi), %eax
+ movl -25(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(21bytes):
+ movl -21(%esi), %eax
+ movl -21(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(17bytes):
+ movl -17(%esi), %eax
+ movl -17(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(13bytes):
+ movl -13(%esi), %eax
+ movl -13(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(9bytes):
+ movl -9(%esi), %eax
+ movl -9(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(5bytes):
+ movl -5(%esi), %eax
+ movl -5(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(1bytes):
+ movzbl -1(%esi), %eax
+ cmpb -1(%edx), %al
+ jne L(set)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ xorl %eax, %eax
+ RETURN
+
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (esi, 0)
+ cfi_rel_offset (ebx, 4)
+L(30bytes):
+ movl -30(%esi), %eax
+ movl -30(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(26bytes):
+ movl -26(%esi), %eax
+ movl -26(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(22bytes):
+ movl -22(%esi), %eax
+ movl -22(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(18bytes):
+ movl -18(%esi), %eax
+ movl -18(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(14bytes):
+ movl -14(%esi), %eax
+ movl -14(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(10bytes):
+ movl -10(%esi), %eax
+ movl -10(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(6bytes):
+ movl -6(%esi), %eax
+ movl -6(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%esi), %eax
+ movzwl -2(%edx), %ecx
+ cmpb %cl, %al
+ jne L(set)
+ cmpl %ecx, %eax
+ jne L(set)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ xorl %eax, %eax
+ RETURN
+
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (esi, 0)
+ cfi_rel_offset (ebx, 4)
+L(31bytes):
+ movl -31(%esi), %eax
+ movl -31(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(27bytes):
+ movl -27(%esi), %eax
+ movl -27(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(23bytes):
+ movl -23(%esi), %eax
+ movl -23(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(19bytes):
+ movl -19(%esi), %eax
+ movl -19(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%esi), %eax
+ movl -15(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%esi), %eax
+ movl -11(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%esi), %eax
+ movl -7(%edx), %ecx
+ cmpl %ecx, %eax
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%esi), %eax
+ movzwl -3(%edx), %ecx
+ cmpb %cl, %al
+ jne L(set)
+ cmpl %ecx, %eax
+ jne L(set)
+ movzbl -1(%esi), %eax
+ cmpb -1(%edx), %al
+ jne L(set)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ xorl %eax, %eax
+ RETURN
+
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (esi, 0)
+ cfi_rel_offset (ebx, 4)
+ ALIGN (4)
+/* ECX >= 32. */
+L(32bytesormore):
+ subl $32, %ecx
+
+ movl (%esi), %eax
+ cmpl (%edx), %eax
+ jne L(load_ecx)
+
+ movl 4(%esi), %eax
+ cmpl 4(%edx), %eax
+ jne L(load_ecx_4)
+
+ movl 8(%esi), %eax
+ cmpl 8(%edx), %eax
+ jne L(load_ecx_8)
+
+ movl 12(%esi), %eax
+ cmpl 12(%edx), %eax
+ jne L(load_ecx_12)
+
+ movl 16(%esi), %eax
+ cmpl 16(%edx), %eax
+ jne L(load_ecx_16)
+
+ movl 20(%esi), %eax
+ cmpl 20(%edx), %eax
+ jne L(load_ecx_20)
+
+ movl 24(%esi), %eax
+ cmpl 24(%edx), %eax
+ jne L(load_ecx_24)
+
+ movl 28(%esi), %eax
+ cmpl 28(%edx), %eax
+ jne L(load_ecx_28)
+
+ addl $32, %esi
+ addl $32, %edx
+ cmpl $32, %ecx
+ jge L(32bytesormore)
+
+ LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
+ addl %ecx, %edx
+ addl %ecx, %esi
+ jmp *%ebx
+
+L(load_ecx_28):
+ addl $0x4, %edx
+L(load_ecx_24):
+ addl $0x4, %edx
+L(load_ecx_20):
+ addl $0x4, %edx
+L(load_ecx_16):
+ addl $0x4, %edx
+L(load_ecx_12):
+ addl $0x4, %edx
+L(load_ecx_8):
+ addl $0x4, %edx
+L(load_ecx_4):
+ addl $0x4, %edx
+L(load_ecx):
+ movl (%edx), %ecx
+
+L(find_diff):
+ cmpb %cl, %al
+ jne L(set)
+ cmpb %ch, %ah
+ jne L(set)
+ shrl $16,%eax
+ shrl $16,%ecx
+ cmpb %cl, %al
+ jne L(set)
+ /* We get there only if we already know there is a
+ difference. */
+ cmpl %ecx, %eax
+L(set):
+ sbbl %eax, %eax
+ sbbl $-1, %eax
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ RETURN
+END (memcmp)
+
+ .section .rodata
+ ALIGN (2)
+L(table_32bytes) :
+ .long L(0bytes) - L(table_32bytes)
+ .long L(1bytes) - L(table_32bytes)
+ .long L(2bytes) - L(table_32bytes)
+ .long L(3bytes) - L(table_32bytes)
+ .long L(4bytes) - L(table_32bytes)
+ .long L(5bytes) - L(table_32bytes)
+ .long L(6bytes) - L(table_32bytes)
+ .long L(7bytes) - L(table_32bytes)
+ .long L(8bytes) - L(table_32bytes)
+ .long L(9bytes) - L(table_32bytes)
+ .long L(10bytes) - L(table_32bytes)
+ .long L(11bytes) - L(table_32bytes)
+ .long L(12bytes) - L(table_32bytes)
+ .long L(13bytes) - L(table_32bytes)
+ .long L(14bytes) - L(table_32bytes)
+ .long L(15bytes) - L(table_32bytes)
+ .long L(16bytes) - L(table_32bytes)
+ .long L(17bytes) - L(table_32bytes)
+ .long L(18bytes) - L(table_32bytes)
+ .long L(19bytes) - L(table_32bytes)
+ .long L(20bytes) - L(table_32bytes)
+ .long L(21bytes) - L(table_32bytes)
+ .long L(22bytes) - L(table_32bytes)
+ .long L(23bytes) - L(table_32bytes)
+ .long L(24bytes) - L(table_32bytes)
+ .long L(25bytes) - L(table_32bytes)
+ .long L(26bytes) - L(table_32bytes)
+ .long L(27bytes) - L(table_32bytes)
+ .long L(28bytes) - L(table_32bytes)
+ .long L(29bytes) - L(table_32bytes)
+ .long L(30bytes) - L(table_32bytes)
+ .long L(31bytes) - L(table_32bytes)
+
+
+#undef bcmp
+weak_alias (memcmp, bcmp)
+libc_hidden_builtin_def (memcmp)
diff --git a/REORG.TODO/sysdeps/i386/i686/memcpy.S b/REORG.TODO/sysdeps/i386/i686/memcpy.S
new file mode 100644
index 0000000000..1d61447430
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/memcpy.S
@@ -0,0 +1,98 @@
+/* Copy memory block and return pointer to beginning of destination block
+ For Intel 80x86, x>=6.
+ This file is part of the GNU C Library.
+ Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+#define LEN SRC+4
+
+ .text
+#if defined PIC && IS_IN (libc)
+ENTRY_CHK (__memcpy_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (__memcpy_chk)
+#endif
+ENTRY (memcpy)
+
+ movl %edi, %eax
+ movl DEST(%esp), %edi
+ movl %esi, %edx
+ movl SRC(%esp), %esi
+
+ movl %edi, %ecx
+ xorl %esi, %ecx
+ andl $3, %ecx
+ movl LEN(%esp), %ecx
+ cld
+ jne .Lunaligned
+
+ cmpl $3, %ecx
+ jbe .Lunaligned
+
+ testl $3, %esi
+ je 1f
+ movsb
+ decl %ecx
+ testl $3, %esi
+ je 1f
+ movsb
+ decl %ecx
+ testl $3, %esi
+ je 1f
+ movsb
+ decl %ecx
+1: pushl %eax
+ movl %ecx, %eax
+ shrl $2, %ecx
+ andl $3, %eax
+ rep
+ movsl
+ movl %eax, %ecx
+ rep
+ movsb
+ popl %eax
+
+.Lend: movl %eax, %edi
+ movl %edx, %esi
+ movl DEST(%esp), %eax
+
+ ret
+
+ /* When we come here the pointers do not have the same
+ alignment or the length is too short. No need to optimize for
+ aligned memory accesses. */
+.Lunaligned:
+ shrl $1, %ecx
+ jnc 1f
+ movsb
+1: shrl $1, %ecx
+ jnc 2f
+ movsw
+2: rep
+ movsl
+ jmp .Lend
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/REORG.TODO/sysdeps/i386/i686/memmove.S b/REORG.TODO/sysdeps/i386/i686/memmove.S
new file mode 100644
index 0000000000..f60c3d501b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/memmove.S
@@ -0,0 +1,120 @@
+/* Copy memory block and return pointer to beginning of destination block
+ For Intel 80x86, x>=6.
+ This file is part of the GNU C Library.
+ Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* one spilled register */
+#define RTN PARMS
+
+ .text
+
+#ifdef USE_AS_BCOPY
+# define SRC RTN
+# define DEST SRC+4
+# define LEN DEST+4
+#else
+# define DEST RTN
+# define SRC DEST+4
+# define LEN SRC+4
+
+# if defined PIC && IS_IN (libc)
+ENTRY_CHK (__memmove_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (__memmove_chk)
+# endif
+#endif
+
+ENTRY (memmove)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+
+ movl LEN(%esp), %ecx
+ movl DEST(%esp), %edi
+ cfi_rel_offset (edi, 0)
+ movl %esi, %edx
+ movl SRC(%esp), %esi
+ cfi_register (esi, edx)
+
+ movl %edi, %eax
+ subl %esi, %eax
+ cmpl %eax, %ecx
+ ja 3f
+
+ cld
+ shrl $1, %ecx
+ jnc 1f
+ movsb
+1: shrl $1, %ecx
+ jnc 2f
+ movsw
+2: rep
+ movsl
+ movl %edx, %esi
+ cfi_restore (esi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+#endif
+
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+ cfi_register (esi, edx)
+
+ /* Backward copying. */
+3: std
+ leal -1(%edi, %ecx), %edi
+ leal -1(%esi, %ecx), %esi
+ shrl $1, %ecx
+ jnc 1f
+ movsb
+1: subl $1, %edi
+ subl $1, %esi
+ shrl $1, %ecx
+ jnc 2f
+ movsw
+2: subl $2, %edi
+ subl $2, %esi
+ rep
+ movsl
+ movl %edx, %esi
+ cfi_restore (esi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+#endif
+
+ cld
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (memmove)
+#ifndef USE_AS_BCOPY
+libc_hidden_builtin_def (memmove)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/mempcpy.S b/REORG.TODO/sysdeps/i386/i686/mempcpy.S
new file mode 100644
index 0000000000..31cb4efdb2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/mempcpy.S
@@ -0,0 +1,65 @@
+/* Copy memory block and return pointer to following byte.
+ For Intel 80x86, x>=6.
+ This file is part of the GNU C Library.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+#define LEN SRC+4
+
+ .text
+#if defined PIC && IS_IN (libc)
+ENTRY_CHK (__mempcpy_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (__mempcpy_chk)
+#endif
+ENTRY (__mempcpy)
+
+ movl LEN(%esp), %ecx
+ movl %edi, %eax
+ cfi_register (edi, eax)
+ movl DEST(%esp), %edi
+ movl %esi, %edx
+ cfi_register (esi, edx)
+ movl SRC(%esp), %esi
+ cld
+ shrl $1, %ecx
+ jnc 1f
+ movsb
+1: shrl $1, %ecx
+ jnc 2f
+ movsw
+2: rep
+ movsl
+ xchgl %edi, %eax
+ cfi_restore (edi)
+ movl %edx, %esi
+ cfi_restore (esi)
+
+ ret
+END (__mempcpy)
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/REORG.TODO/sysdeps/i386/i686/memset.S b/REORG.TODO/sysdeps/i386/i686/memset.S
new file mode 100644
index 0000000000..24d06178d2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/memset.S
@@ -0,0 +1,100 @@
+/* memset/bzero -- set memory area to CH/0
+ Highly optimized version for ix86, x>=6.
+ Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#ifdef USE_AS_BZERO
+# define DEST PARMS
+# define LEN DEST+4
+#else
+# define RTN PARMS
+# define DEST RTN
+# define CHR DEST+4
+# define LEN CHR+4
+#endif
+
+ .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY_CHK (__memset_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (__memset_chk)
+#endif
+ENTRY (memset)
+
+ cld
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ movl DEST(%esp), %edx
+ movl LEN(%esp), %ecx
+#ifdef USE_AS_BZERO
+ xorl %eax, %eax /* fill with 0 */
+#else
+ movzbl CHR(%esp), %eax
+#endif
+ jecxz 1f
+ movl %edx, %edi
+ cfi_rel_offset (edi, 0)
+ andl $3, %edx
+ jz 2f /* aligned */
+ jp 3f /* misaligned at 3, store just one byte below */
+ stosb /* misaligned at 1 or 2, store two bytes */
+ decl %ecx
+ jz 1f
+3: stosb
+ decl %ecx
+ jz 1f
+ xorl $1, %edx
+ jnz 2f /* was misaligned at 2 or 3, now aligned */
+ stosb /* was misaligned at 1, store third byte */
+ decl %ecx
+2: movl %ecx, %edx
+ shrl $2, %ecx
+ andl $3, %edx
+#ifndef USE_AS_BZERO
+ imul $0x01010101, %eax
+#endif
+ rep
+ stosl
+ movl %edx, %ecx
+ rep
+ stosb
+
+1:
+#ifndef USE_AS_BZERO
+ movl DEST(%esp), %eax /* start address of destination is result */
+#endif
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (memset)
+libc_hidden_builtin_def (memset)
+
+#if defined SHARED && IS_IN (libc) && !defined __memset_chk \
+ && !defined USE_AS_BZERO
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+ .section .gnu.warning.__memset_zero_constant_len_parameter
+ .string "memset used with constant zero length parameter; this could be due to transposed parameters"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/memusage.h b/REORG.TODO/sysdeps/i386/i686/memusage.h
new file mode 100644
index 0000000000..77a020d7c0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/memusage.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; })
+#define GETTIME(low,high) asm ("rdtsc" : "=a" (low), "=d" (high))
+
+#include <sysdeps/generic/memusage.h>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/Makefile b/REORG.TODO/sysdeps/i386/i686/multiarch/Makefile
new file mode 100644
index 0000000000..4a0c20c051
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/Makefile
@@ -0,0 +1,44 @@
+ifeq ($(subdir),csu)
+tests += test-multiarch
+endif
+
+ifeq ($(subdir),string)
+gen-as-const-headers += locale-defines.sym
+sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
+ memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
+ memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
+ memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
+ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
+ memcmp-ssse3 memcmp-sse4 varshift \
+ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
+ strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
+ strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
+ strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
+ strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
+ memchr-sse2 memchr-sse2-bsf \
+ memrchr-sse2 memrchr-sse2-bsf memrchr-c \
+ rawmemchr-sse2 rawmemchr-sse2-bsf \
+ strnlen-sse2 strnlen-c \
+ strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
+ strncase_l-c strncase-c strncase_l-ssse3 \
+ strcasecmp_l-sse4 strncase_l-sse4 \
+ bcopy-sse2-unaligned memcpy-sse2-unaligned \
+ mempcpy-sse2-unaligned memmove-sse2-unaligned \
+ strcspn-c strpbrk-c strspn-c
+CFLAGS-varshift.c += -msse4
+CFLAGS-strcspn-c.c += -msse4
+CFLAGS-strpbrk-c.c += -msse4
+CFLAGS-strspn-c.c += -msse4
+endif
+
+ifeq ($(subdir),wcsmbs)
+sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \
+ wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \
+ wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c
+endif
+
+ifeq ($(subdir),math)
+libm-sysdep_routines += s_fma-fma s_fmaf-fma
+CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse
+CFLAGS-s_fmaf-fma.c += -mavx -mfpmath=sse
+endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
new file mode 100644
index 0000000000..efef2a10dd
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY __bcopy_sse2_unaligned
+#include "memcpy-sse2-unaligned.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
new file mode 100644
index 0000000000..cbc8b420e8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY __bcopy_ssse3_rep
+#include "memcpy-ssse3-rep.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
new file mode 100644
index 0000000000..36aac44b9c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY __bcopy_ssse3
+#include "memcpy-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy.S
new file mode 100644
index 0000000000..877f82c28f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bcopy.S
@@ -0,0 +1,59 @@
+/* Multiple versions of bcopy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#if IS_IN (libc)
+ .text
+ENTRY(bcopy)
+ .type bcopy, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__bcopy_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__bcopy_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep)
+2: ret
+END(bcopy)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __bcopy_ia32, @function; \
+ .p2align 4; \
+ .globl __bcopy_ia32; \
+ .hidden __bcopy_ia32; \
+ __bcopy_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32
+
+#endif
+
+#include "../bcopy.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
new file mode 100644
index 0000000000..507b288bb3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_sse2_rep __bzero_sse2_rep
+#include "memset-sse2-rep.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2.S
new file mode 100644
index 0000000000..8d04512e4e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bzero-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_sse2 __bzero_sse2
+#include "memset-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/bzero.S b/REORG.TODO/sysdeps/i386/i686/multiarch/bzero.S
new file mode 100644
index 0000000000..9dac490aa2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/bzero.S
@@ -0,0 +1,62 @@
+/* Multiple versions of bzero
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#if IS_IN (libc)
+ .text
+ENTRY(__bzero)
+ .type __bzero, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__bzero_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX ( __bzero_sse2)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__bzero_sse2_rep)
+2: ret
+END(__bzero)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __bzero_ia32, @function; \
+ .p2align 4; \
+ .globl __bzero_ia32; \
+ .hidden __bzero_ia32; \
+ __bzero_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __bzero_ia32, .-__bzero_ia32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI___bzero; __GI___bzero = __bzero_ia32
+# endif
+#endif
+
+#include "../bzero.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
new file mode 100644
index 0000000000..e8026a2a78
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -0,0 +1,376 @@
+/* Enumerate available IFUNC implementations of a function. i686 version.
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#include <ifunc-impl-list.h>
+#include "init-arch.h"
+
+/* Maximum number of IFUNC implementations. */
+#define MAX_IFUNC 4
+
+/* Fill ARRAY of MAX elements with IFUNC implementations for function
+ NAME and return the number of valid entries. */
+
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ size_t max)
+{
+ assert (max >= MAX_IFUNC);
+
+ size_t i = 0;
+
+ /* Support sysdeps/i386/i686/multiarch/bcopy.S. */
+ IFUNC_IMPL (i, name, bcopy,
+ IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
+ __bcopy_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
+ __bcopy_ssse3)
+ IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2),
+ __bcopy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/bzero.S. */
+ IFUNC_IMPL (i, name, bzero,
+ IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
+ __bzero_sse2_rep)
+ IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
+ __bzero_sse2)
+ IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memchr.S. */
+ IFUNC_IMPL (i, name, memchr,
+ IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
+ __memchr_sse2_bsf)
+ IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
+ __memchr_sse2)
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memcmp.S. */
+ IFUNC_IMPL (i, name, memcmp,
+ IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2),
+ __memcmp_sse4_2)
+ IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
+ __memcmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */
+ IFUNC_IMPL (i, name, __memmove_chk,
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __memmove_chk_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __memmove_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_CPU_FEATURE (SSE2),
+ __memmove_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+ __memmove_chk_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memmove.S. */
+ IFUNC_IMPL (i, name, memmove,
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
+ __memmove_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
+ __memmove_ssse3)
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2),
+ __memmove_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memrchr.S. */
+ IFUNC_IMPL (i, name, memrchr,
+ IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
+ __memrchr_sse2_bsf)
+ IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
+ __memrchr_sse2)
+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memset_chk.S. */
+ IFUNC_IMPL (i, name, __memset_chk,
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_CPU_FEATURE (SSE2),
+ __memset_chk_sse2_rep)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_CPU_FEATURE (SSE2),
+ __memset_chk_sse2)
+ IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+ __memset_chk_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memset.S. */
+ IFUNC_IMPL (i, name, memset,
+ IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
+ __memset_sse2_rep)
+ IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
+ __memset_sse2)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/rawmemchr.S. */
+ IFUNC_IMPL (i, name, rawmemchr,
+ IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
+ __rawmemchr_sse2_bsf)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
+ __rawmemchr_sse2)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/stpncpy.S. */
+ IFUNC_IMPL (i, name, stpncpy,
+ IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
+ __stpncpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2),
+ __stpncpy_sse2)
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/stpcpy.S. */
+ IFUNC_IMPL (i, name, stpcpy,
+ IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
+ __stpcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2),
+ __stpcpy_sse2)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */
+ IFUNC_IMPL (i, name, strcasecmp,
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ HAS_CPU_FEATURE (SSE4_2),
+ __strcasecmp_sse4_2)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ HAS_CPU_FEATURE (SSSE3),
+ __strcasecmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S. */
+ IFUNC_IMPL (i, name, strcasecmp_l,
+ IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+ HAS_CPU_FEATURE (SSE4_2),
+ __strcasecmp_l_sse4_2)
+ IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+ HAS_CPU_FEATURE (SSSE3),
+ __strcasecmp_l_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
+ __strcasecmp_l_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strcat.S. */
+ IFUNC_IMPL (i, name, strcat,
+ IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
+ __strcat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2),
+ __strcat_sse2)
+ IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strchr.S. */
+ IFUNC_IMPL (i, name, strchr,
+ IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
+ __strchr_sse2_bsf)
+ IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
+ __strchr_sse2)
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strcmp.S. */
+ IFUNC_IMPL (i, name, strcmp,
+ IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
+ __strcmp_sse4_2)
+ IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
+ __strcmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strcpy.S. */
+ IFUNC_IMPL (i, name, strcpy,
+ IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
+ __strcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2),
+ __strcpy_sse2)
+ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strcspn.S. */
+ IFUNC_IMPL (i, name, strcspn,
+ IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
+ __strcspn_sse42)
+ IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strncase.S. */
+ IFUNC_IMPL (i, name, strncasecmp,
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ HAS_CPU_FEATURE (SSE4_2),
+ __strncasecmp_sse4_2)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ HAS_CPU_FEATURE (SSSE3),
+ __strncasecmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
+ __strncasecmp_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strncase_l.S. */
+ IFUNC_IMPL (i, name, strncasecmp_l,
+ IFUNC_IMPL_ADD (array, i, strncasecmp_l,
+ HAS_CPU_FEATURE (SSE4_2),
+ __strncasecmp_l_sse4_2)
+ IFUNC_IMPL_ADD (array, i, strncasecmp_l,
+ HAS_CPU_FEATURE (SSSE3),
+ __strncasecmp_l_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
+ __strncasecmp_l_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strncat.S. */
+ IFUNC_IMPL (i, name, strncat,
+ IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
+ __strncat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2),
+ __strncat_sse2)
+ IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strncpy.S. */
+ IFUNC_IMPL (i, name, strncpy,
+ IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
+ __strncpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2),
+ __strncpy_sse2)
+ IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strnlen.S. */
+ IFUNC_IMPL (i, name, strnlen,
+ IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2),
+ __strnlen_sse2)
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strpbrk.S. */
+ IFUNC_IMPL (i, name, strpbrk,
+ IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
+ __strpbrk_sse42)
+ IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strrchr.S. */
+ IFUNC_IMPL (i, name, strrchr,
+ IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
+ __strrchr_sse2_bsf)
+ IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
+ __strrchr_sse2)
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strspn.S. */
+ IFUNC_IMPL (i, name, strspn,
+ IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
+ __strspn_sse42)
+ IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/wcschr.S. */
+ IFUNC_IMPL (i, name, wcschr,
+ IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2),
+ __wcschr_sse2)
+ IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/wcscmp.S. */
+ IFUNC_IMPL (i, name, wcscmp,
+ IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2),
+ __wcscmp_sse2)
+ IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/wcscpy.S. */
+ IFUNC_IMPL (i, name, wcscpy,
+ IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
+ __wcscpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/wcslen.S. */
+ IFUNC_IMPL (i, name, wcslen,
+ IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2),
+ __wcslen_sse2)
+ IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */
+ IFUNC_IMPL (i, name, wcsrchr,
+ IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2),
+ __wcsrchr_sse2)
+ IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/wmemcmp.S. */
+ IFUNC_IMPL (i, name, wmemcmp,
+ IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2),
+ __wmemcmp_sse4_2)
+ IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
+ __wmemcmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32))
+
+#ifdef SHARED
+ /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */
+ IFUNC_IMPL (i, name, __memcpy_chk,
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __memcpy_chk_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __memcpy_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_CPU_FEATURE (SSE2),
+ __memcpy_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+ __memcpy_chk_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/memcpy.S. */
+ IFUNC_IMPL (i, name, memcpy,
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
+ __memcpy_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
+ __memcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2),
+ __memcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
+ IFUNC_IMPL (i, name, __mempcpy_chk,
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_chk_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSE2),
+ __mempcpy_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */
+ IFUNC_IMPL (i, name, mempcpy,
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_ssse3_rep)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2),
+ __mempcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strlen.S. */
+ IFUNC_IMPL (i, name, strlen,
+ IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
+ __strlen_sse2_bsf)
+ IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
+ __strlen_sse2)
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32))
+
+ /* Support sysdeps/i386/i686/multiarch/strncmp.S. */
+ IFUNC_IMPL (i, name, strncmp,
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
+ __strncmp_sse4_2)
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
+ __strncmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32))
+#endif
+
+ return i;
+}
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/locale-defines.sym b/REORG.TODO/sysdeps/i386/i686/multiarch/locale-defines.sym
new file mode 100644
index 0000000000..aebff9a4f9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/locale-defines.sym
@@ -0,0 +1,11 @@
+#include <locale/localeinfo.h>
+#include <langinfo.h>
+#include <stddef.h>
+
+--
+
+LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales)
+LC_CTYPE
+_NL_CTYPE_NONASCII_CASE
+LOCALE_DATA_VALUES offsetof (struct __locale_data, values)
+SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0])
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
new file mode 100644
index 0000000000..dd316486e6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
@@ -0,0 +1,502 @@
+/* Optimized memchr with sse2
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define STR1 PARMS
+# define STR2 STR1+4
+
+# ifndef USE_AS_RAWMEMCHR
+# define LEN STR2+4
+# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
+# endif
+
+# ifndef MEMCHR
+# define MEMCHR __memchr_sse2_bsf
+# endif
+
+ .text
+ENTRY (MEMCHR)
+
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+# ifndef USE_AS_RAWMEMCHR
+ mov LEN(%esp), %edx
+ test %edx, %edx
+ jz L(return_null_1)
+# endif
+ mov %ecx, %eax
+
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+
+ and $63, %ecx
+ pshufd $0, %xmm1, %xmm1
+
+ cmp $48, %ecx
+ ja L(crosscache)
+
+ movdqu (%eax), %xmm0
+ pcmpeqb %xmm1, %xmm0
+/* Check if there is a match. */
+ pmovmskb %xmm0, %ecx
+ test %ecx, %ecx
+ je L(unaligned_no_match_1)
+/* Check which byte is a match. */
+ bsf %ecx, %ecx
+
+# ifndef USE_AS_RAWMEMCHR
+ sub %ecx, %edx
+ jbe L(return_null_1)
+# endif
+ add %ecx, %eax
+ ret
+
+ .p2align 4
+L(unaligned_no_match_1):
+# ifndef USE_AS_RAWMEMCHR
+ sub $16, %edx
+ jbe L(return_null_1)
+ PUSH (%edi)
+ lea 16(%eax), %edi
+ and $15, %eax
+ and $-16, %edi
+ add %eax, %edx
+# else
+ lea 16(%eax), %edx
+ and $-16, %edx
+# endif
+ jmp L(loop_prolog)
+
+ .p2align 4
+L(return_null_1):
+ xor %eax, %eax
+ ret
+
+# ifndef USE_AS_RAWMEMCHR
+ CFI_POP (%edi)
+# endif
+
+ .p2align 4
+L(crosscache):
+/* Handle unaligned string. */
+
+# ifndef USE_AS_RAWMEMCHR
+ PUSH (%edi)
+ mov %eax, %edi
+ and $15, %ecx
+ and $-16, %edi
+ movdqa (%edi), %xmm0
+# else
+ mov %eax, %edx
+ and $15, %ecx
+ and $-16, %edx
+ movdqa (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+/* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+/* Check which byte is a match. */
+ bsf %eax, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+ sub %eax, %edx
+ jbe L(return_null)
+ add %edi, %eax
+ add %ecx, %eax
+ RETURN
+# else
+ add %edx, %eax
+ add %ecx, %eax
+ ret
+# endif
+
+ .p2align 4
+L(unaligned_no_match):
+# ifndef USE_AS_RAWMEMCHR
+ /* Calculate the last acceptable address and check for possible
+ addition overflow by using satured math:
+ edx = ecx + edx
+ edx |= -(edx < ecx) */
+ add %ecx, %edx
+ sbb %eax, %eax
+ or %eax, %edx
+ sub $16, %edx
+ jbe L(return_null)
+ add $16, %edi
+# else
+ add $16, %edx
+# endif
+
+ .p2align 4
+/* Loop start on aligned string. */
+L(loop_prolog):
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edx
+ jbe L(exit_loop)
+ movdqa (%edi), %xmm0
+# else
+ movdqa (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 16(%edi), %xmm2
+# else
+ movdqa 16(%edx), %xmm2
+# endif
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 32(%edi), %xmm3
+# else
+ movdqa 32(%edx), %xmm3
+# endif
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 48(%edi), %xmm4
+# else
+ movdqa 48(%edx), %xmm4
+# endif
+ pcmpeqb %xmm1, %xmm4
+
+# ifndef USE_AS_RAWMEMCHR
+ add $64, %edi
+# else
+ add $64, %edx
+# endif
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+# ifndef USE_AS_RAWMEMCHR
+ test $0x3f, %edi
+# else
+ test $0x3f, %edx
+# endif
+ jz L(align64_loop)
+
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edx
+ jbe L(exit_loop)
+ movdqa (%edi), %xmm0
+# else
+ movdqa (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 16(%edi), %xmm2
+# else
+ movdqa 16(%edx), %xmm2
+# endif
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 32(%edi), %xmm3
+# else
+ movdqa 32(%edx), %xmm3
+# endif
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 48(%edi), %xmm3
+# else
+ movdqa 48(%edx), %xmm3
+# endif
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+ add $64, %edi
+# else
+ add $64, %edx
+# endif
+ test %eax, %eax
+ jnz L(matches0)
+
+# ifndef USE_AS_RAWMEMCHR
+ mov %edi, %ecx
+ and $-64, %edi
+ and $63, %ecx
+ add %ecx, %edx
+# else
+ and $-64, %edx
+# endif
+
+ .p2align 4
+L(align64_loop):
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edx
+ jbe L(exit_loop)
+ movdqa (%edi), %xmm0
+ movdqa 16(%edi), %xmm2
+ movdqa 32(%edi), %xmm3
+ movdqa 48(%edi), %xmm4
+# else
+ movdqa (%edx), %xmm0
+ movdqa 16(%edx), %xmm2
+ movdqa 32(%edx), %xmm3
+ movdqa 48(%edx), %xmm4
+# endif
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm1, %xmm4
+
+ pmaxub %xmm0, %xmm3
+ pmaxub %xmm2, %xmm4
+ pmaxub %xmm3, %xmm4
+ pmovmskb %xmm4, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+ add $64, %edi
+# else
+ add $64, %edx
+# endif
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edi
+# else
+ sub $64, %edx
+# endif
+
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 32(%edi), %xmm3
+# else
+ movdqa 32(%edx), %xmm3
+# endif
+
+ pcmpeqb %xmm1, %xmm3
+
+# ifndef USE_AS_RAWMEMCHR
+ pcmpeqb 48(%edi), %xmm1
+# else
+ pcmpeqb 48(%edx), %xmm1
+# endif
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ pmovmskb %xmm1, %eax
+ bsf %eax, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+ lea 48(%edi, %eax), %eax
+ RETURN
+# else
+ lea 48(%edx, %eax), %eax
+ ret
+# endif
+
+# ifndef USE_AS_RAWMEMCHR
+ .p2align 4
+L(exit_loop):
+ add $64, %edx
+ cmp $32, %edx
+ jbe L(exit_loop_32)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%edi), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%edi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+ cmp $48, %edx
+ jbe L(return_null)
+
+ pcmpeqb 48(%edi), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches48_1)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(exit_loop_32):
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches_1)
+ cmp $16, %edx
+ jbe L(return_null)
+
+ pcmpeqb 16(%edi), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches16_1)
+ xor %eax, %eax
+ RETURN
+# endif
+ .p2align 4
+L(matches0):
+ bsf %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ lea -16(%eax, %edi), %eax
+ RETURN
+# else
+ lea -16(%eax, %edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(matches):
+ bsf %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ add %edi, %eax
+ RETURN
+# else
+ add %edx, %eax
+ ret
+# endif
+
+ .p2align 4
+L(matches16):
+ bsf %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ lea 16(%eax, %edi), %eax
+ RETURN
+# else
+ lea 16(%eax, %edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(matches32):
+ bsf %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ lea 32(%eax, %edi), %eax
+ RETURN
+# else
+ lea 32(%eax, %edx), %eax
+ ret
+# endif
+
+# ifndef USE_AS_RAWMEMCHR
+ .p2align 4
+L(matches_1):
+ bsf %eax, %eax
+ sub %eax, %edx
+ jbe L(return_null)
+
+ add %edi, %eax
+ RETURN
+
+ .p2align 4
+L(matches16_1):
+ sub $16, %edx
+ bsf %eax, %eax
+ sub %eax, %edx
+ jbe L(return_null)
+
+ lea 16(%edi, %eax), %eax
+ RETURN
+
+ .p2align 4
+L(matches32_1):
+ sub $32, %edx
+ bsf %eax, %eax
+ sub %eax, %edx
+ jbe L(return_null)
+
+ lea 32(%edi, %eax), %eax
+ RETURN
+
+ .p2align 4
+L(matches48_1):
+ sub $48, %edx
+ bsf %eax, %eax
+ sub %eax, %edx
+ jbe L(return_null)
+
+ lea 48(%edi, %eax), %eax
+ RETURN
+# endif
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ RETURN
+# else
+ ret
+# endif
+
+END (MEMCHR)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2.S
new file mode 100644
index 0000000000..172d70de13
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memchr-sse2.S
@@ -0,0 +1,709 @@
+/* Optimized memchr with sse2 without bsf
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef USE_AS_RAWMEMCHR
+# define ENTRANCE PUSH(%edi);
+# define PARMS 8
+# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
+# else
+# define ENTRANCE
+# define PARMS 4
+# endif
+
+# define STR1 PARMS
+# define STR2 STR1+4
+
+# ifndef USE_AS_RAWMEMCHR
+# define LEN STR2+4
+# endif
+
+# ifndef MEMCHR
+# define MEMCHR __memchr_sse2
+# endif
+
+ atom_text_section
+ENTRY (MEMCHR)
+ ENTRANCE
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+# ifndef USE_AS_RAWMEMCHR
+ mov LEN(%esp), %edx
+ test %edx, %edx
+ jz L(return_null)
+# endif
+
+ punpcklbw %xmm1, %xmm1
+# ifndef USE_AS_RAWMEMCHR
+ mov %ecx, %edi
+# else
+ mov %ecx, %edx
+# endif
+ punpcklbw %xmm1, %xmm1
+
+ and $63, %ecx
+ pshufd $0, %xmm1, %xmm1
+ cmp $48, %ecx
+ ja L(crosscache)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqu (%edi), %xmm0
+# else
+ movdqu (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ jnz L(match_case2_prolog)
+
+ sub $16, %edx
+ jbe L(return_null)
+ lea 16(%edi), %edi
+ and $15, %ecx
+ and $-16, %edi
+ add %ecx, %edx
+# else
+ jnz L(match_case1_prolog)
+ lea 16(%edx), %edx
+ and $-16, %edx
+# endif
+ jmp L(loop_prolog)
+
+ .p2align 4
+L(crosscache):
+ and $15, %ecx
+# ifndef USE_AS_RAWMEMCHR
+ and $-16, %edi
+ movdqa (%edi), %xmm0
+# else
+ and $-16, %edx
+ movdqa (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ sar %cl, %eax
+ test %eax, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+ jnz L(match_case2_prolog1)
+ /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using
+ "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
+ possible addition overflow. */
+ neg %ecx
+ add $16, %ecx
+ sub %ecx, %edx
+ jbe L(return_null)
+ lea 16(%edi), %edi
+# else
+ jnz L(match_case1_prolog1)
+ lea 16(%edx), %edx
+# endif
+
+ .p2align 4
+L(loop_prolog):
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edx
+ jbe L(exit_loop)
+ movdqa (%edi), %xmm0
+# else
+ movdqa (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+ xor %ecx, %ecx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 16(%edi), %xmm2
+# else
+ movdqa 16(%edx), %xmm2
+# endif
+ pcmpeqb %xmm1, %xmm2
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 32(%edi), %xmm3
+# else
+ movdqa 32(%edx), %xmm3
+# endif
+ pcmpeqb %xmm1, %xmm3
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 48(%edi), %xmm4
+# else
+ movdqa 48(%edx), %xmm4
+# endif
+ pcmpeqb %xmm1, %xmm4
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ lea 64(%edi), %edi
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa (%edi), %xmm0
+# else
+ lea 64(%edx), %edx
+ movdqa (%edx), %xmm0
+# endif
+ pcmpeqb %xmm1, %xmm0
+ xor %ecx, %ecx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 16(%edi), %xmm2
+# else
+ movdqa 16(%edx), %xmm2
+# endif
+ pcmpeqb %xmm1, %xmm2
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 32(%edi), %xmm3
+# else
+ movdqa 32(%edx), %xmm3
+# endif
+ pcmpeqb %xmm1, %xmm3
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 48(%edi), %xmm4
+# else
+ movdqa 48(%edx), %xmm4
+# endif
+ pcmpeqb %xmm1, %xmm4
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ lea 64(%edi), %edi
+ mov %edi, %ecx
+ and $-64, %edi
+ and $63, %ecx
+ add %ecx, %edx
+# else
+ lea 64(%edx), %edx
+ and $-64, %edx
+# endif
+
+ .p2align 4
+L(align64_loop):
+
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edx
+ jbe L(exit_loop)
+ movdqa (%edi), %xmm0
+ movdqa 16(%edi), %xmm2
+ movdqa 32(%edi), %xmm3
+ movdqa 48(%edi), %xmm4
+# else
+ movdqa (%edx), %xmm0
+ movdqa 16(%edx), %xmm2
+ movdqa 32(%edx), %xmm3
+ movdqa 48(%edx), %xmm4
+# endif
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm1, %xmm4
+
+ pmaxub %xmm0, %xmm3
+ pmaxub %xmm2, %xmm4
+ pmaxub %xmm3, %xmm4
+# ifndef USE_AS_RAWMEMCHR
+ add $64, %edi
+# else
+ add $64, %edx
+# endif
+ pmovmskb %xmm4, %eax
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+# ifndef USE_AS_RAWMEMCHR
+ sub $64, %edi
+# else
+ sub $64, %edx
+# endif
+
+ pmovmskb %xmm0, %eax
+ xor %ecx, %ecx
+ test %eax, %eax
+ jnz L(match_case1)
+
+ pmovmskb %xmm2, %eax
+ lea 16(%ecx), %ecx
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ movdqa 32(%edi), %xmm3
+# else
+ movdqa 32(%edx), %xmm3
+# endif
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ lea 16(%ecx), %ecx
+ test %eax, %eax
+ jnz L(match_case1)
+
+# ifndef USE_AS_RAWMEMCHR
+ pcmpeqb 48(%edi), %xmm1
+# else
+ pcmpeqb 48(%edx), %xmm1
+# endif
+ pmovmskb %xmm1, %eax
+ lea 16(%ecx), %ecx
+
+ .p2align 4
+L(match_case1):
+# ifndef USE_AS_RAWMEMCHR
+ add %ecx, %edi
+# else
+L(match_case1_prolog1):
+ add %ecx, %edx
+L(match_case1_prolog):
+# endif
+ test %al, %al
+ jz L(match_case1_high)
+ mov %al, %cl
+ and $15, %cl
+ jz L(match_case1_8)
+ test $0x01, %al
+ jnz L(ExitCase1_1)
+ test $0x02, %al
+ jnz L(ExitCase1_2)
+ test $0x04, %al
+ jnz L(ExitCase1_3)
+# ifndef USE_AS_RAWMEMCHR
+ lea 3(%edi), %eax
+ RETURN
+# else
+ lea 3(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(match_case1_8):
+ test $0x10, %al
+ jnz L(ExitCase1_5)
+ test $0x20, %al
+ jnz L(ExitCase1_6)
+ test $0x40, %al
+ jnz L(ExitCase1_7)
+# ifndef USE_AS_RAWMEMCHR
+ lea 7(%edi), %eax
+ RETURN
+# else
+ lea 7(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(match_case1_high):
+ mov %ah, %ch
+ and $15, %ch
+ jz L(match_case1_high_8)
+ test $0x01, %ah
+ jnz L(ExitCase1_9)
+ test $0x02, %ah
+ jnz L(ExitCase1_10)
+ test $0x04, %ah
+ jnz L(ExitCase1_11)
+# ifndef USE_AS_RAWMEMCHR
+ lea 11(%edi), %eax
+ RETURN
+# else
+ lea 11(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(match_case1_high_8):
+ test $0x10, %ah
+ jnz L(ExitCase1_13)
+ test $0x20, %ah
+ jnz L(ExitCase1_14)
+ test $0x40, %ah
+ jnz L(ExitCase1_15)
+# ifndef USE_AS_RAWMEMCHR
+ lea 15(%edi), %eax
+ RETURN
+# else
+ lea 15(%edx), %eax
+ ret
+# endif
+
+# ifndef USE_AS_RAWMEMCHR
+ .p2align 4
+L(exit_loop):
+ add $64, %edx
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ xor %ecx, %ecx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(match_case2)
+ cmp $16, %edx
+ jbe L(return_null)
+
+ movdqa 16(%edi), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(match_case2)
+ cmp $32, %edx
+ jbe L(return_null)
+
+ movdqa 32(%edi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(match_case2)
+ cmp $48, %edx
+ jbe L(return_null)
+
+ pcmpeqb 48(%edi), %xmm1
+ lea 16(%ecx), %ecx
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(match_case2)
+
+ xor %eax, %eax
+ RETURN
+# endif
+
+ .p2align 4
+L(ExitCase1_1):
+# ifndef USE_AS_RAWMEMCHR
+ mov %edi, %eax
+ RETURN
+# else
+ mov %edx, %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_2):
+# ifndef USE_AS_RAWMEMCHR
+ lea 1(%edi), %eax
+ RETURN
+# else
+ lea 1(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_3):
+# ifndef USE_AS_RAWMEMCHR
+ lea 2(%edi), %eax
+ RETURN
+# else
+ lea 2(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_5):
+# ifndef USE_AS_RAWMEMCHR
+ lea 4(%edi), %eax
+ RETURN
+# else
+ lea 4(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_6):
+# ifndef USE_AS_RAWMEMCHR
+ lea 5(%edi), %eax
+ RETURN
+# else
+ lea 5(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_7):
+# ifndef USE_AS_RAWMEMCHR
+ lea 6(%edi), %eax
+ RETURN
+# else
+ lea 6(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_9):
+# ifndef USE_AS_RAWMEMCHR
+ lea 8(%edi), %eax
+ RETURN
+# else
+ lea 8(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_10):
+# ifndef USE_AS_RAWMEMCHR
+ lea 9(%edi), %eax
+ RETURN
+# else
+ lea 9(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_11):
+# ifndef USE_AS_RAWMEMCHR
+ lea 10(%edi), %eax
+ RETURN
+# else
+ lea 10(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_13):
+# ifndef USE_AS_RAWMEMCHR
+ lea 12(%edi), %eax
+ RETURN
+# else
+ lea 12(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_14):
+# ifndef USE_AS_RAWMEMCHR
+ lea 13(%edi), %eax
+ RETURN
+# else
+ lea 13(%edx), %eax
+ ret
+# endif
+
+ .p2align 4
+L(ExitCase1_15):
+# ifndef USE_AS_RAWMEMCHR
+ lea 14(%edi), %eax
+ RETURN
+# else
+ lea 14(%edx), %eax
+ ret
+# endif
+
+# ifndef USE_AS_RAWMEMCHR
+ .p2align 4
+L(match_case2):
+ sub %ecx, %edx
+L(match_case2_prolog1):
+ add %ecx, %edi
+L(match_case2_prolog):
+ test %al, %al
+ jz L(match_case2_high)
+ mov %al, %cl
+ and $15, %cl
+ jz L(match_case2_8)
+ test $0x01, %al
+ jnz L(ExitCase2_1)
+ test $0x02, %al
+ jnz L(ExitCase2_2)
+ test $0x04, %al
+ jnz L(ExitCase2_3)
+ sub $4, %edx
+ jb L(return_null)
+ lea 3(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_case2_8):
+ test $0x10, %al
+ jnz L(ExitCase2_5)
+ test $0x20, %al
+ jnz L(ExitCase2_6)
+ test $0x40, %al
+ jnz L(ExitCase2_7)
+ sub $8, %edx
+ jb L(return_null)
+ lea 7(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_case2_high):
+ mov %ah, %ch
+ and $15, %ch
+ jz L(match_case2_high_8)
+ test $0x01, %ah
+ jnz L(ExitCase2_9)
+ test $0x02, %ah
+ jnz L(ExitCase2_10)
+ test $0x04, %ah
+ jnz L(ExitCase2_11)
+ sub $12, %edx
+ jb L(return_null)
+ lea 11(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_case2_high_8):
+ test $0x10, %ah
+ jnz L(ExitCase2_13)
+ test $0x20, %ah
+ jnz L(ExitCase2_14)
+ test $0x40, %ah
+ jnz L(ExitCase2_15)
+ sub $16, %edx
+ jb L(return_null)
+ lea 15(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_1):
+ mov %edi, %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_2):
+ sub $2, %edx
+ jb L(return_null)
+ lea 1(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_3):
+ sub $3, %edx
+ jb L(return_null)
+ lea 2(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_5):
+ sub $5, %edx
+ jb L(return_null)
+ lea 4(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_6):
+ sub $6, %edx
+ jb L(return_null)
+ lea 5(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_7):
+ sub $7, %edx
+ jb L(return_null)
+ lea 6(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_9):
+ sub $9, %edx
+ jb L(return_null)
+ lea 8(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_10):
+ sub $10, %edx
+ jb L(return_null)
+ lea 9(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_11):
+ sub $11, %edx
+ jb L(return_null)
+ lea 10(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_13):
+ sub $13, %edx
+ jb L(return_null)
+ lea 12(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_14):
+ sub $14, %edx
+ jb L(return_null)
+ lea 13(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(ExitCase2_15):
+ sub $15, %edx
+ jb L(return_null)
+ lea 14(%edi), %eax
+ RETURN
+# endif
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ RETURN
+# else
+ ret
+# endif
+
+END (MEMCHR)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memchr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memchr.S
new file mode 100644
index 0000000000..bd0dace290
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memchr.S
@@ -0,0 +1,65 @@
+/* Multiple versions of memchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(__memchr)
+ .type __memchr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jz 3f
+
+ LOAD_FUNC_GOT_EAX ( __memchr_sse2)
+ ret
+
+2: LOAD_FUNC_GOT_EAX (__memchr_ia32)
+ ret
+
+3: LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf)
+ ret
+END(__memchr)
+
+weak_alias(__memchr, memchr)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memchr_ia32, @function; \
+ .globl __memchr_ia32; \
+ .p2align 4; \
+ __memchr_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memchr_ia32, .-__memchr_ia32
+
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memchr; __GI_memchr = __memchr_ia32
+
+#endif
+#include "../../memchr.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-sse4.S
new file mode 100644
index 0000000000..2aa13048b2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -0,0 +1,1225 @@
+/* memcmp with SSE4.2, wmemcmp with SSE4.2
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef MEMCMP
+# define MEMCMP __memcmp_sse4_2
+# endif
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define BLK1 PARMS
+# define BLK2 BLK1 + 4
+# define LEN BLK2 + 4
+# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
+
+
+# ifdef SHARED
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+/* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+/* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+/* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+/* We loaded the jump table and adjusted EDX/ESI. Go. */ \
+ jmp *%ebx
+# else
+# define JMPTBL(I, B) I
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+# endif
+
+
+/* Warning!
+ wmemcmp has to use SIGNED comparison for elements.
+ memcmp has to use UNSIGNED comparison for elemnts.
+*/
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (MEMCMP)
+ movl BLK1(%esp), %eax
+ movl BLK2(%esp), %edx
+ movl LEN(%esp), %ecx
+
+# ifdef USE_AS_WMEMCMP
+ shl $2, %ecx
+ test %ecx, %ecx
+ jz L(return0)
+# else
+ cmp $1, %ecx
+ jbe L(less1bytes)
+# endif
+
+ pxor %xmm0, %xmm0
+ cmp $64, %ecx
+ ja L(64bytesormore)
+ cmp $8, %ecx
+
+# ifndef USE_AS_WMEMCMP
+ PUSH (%ebx)
+ jb L(less8bytes)
+# else
+ jb L(less8bytes)
+ PUSH (%ebx)
+# endif
+
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(less8bytes):
+ mov (%eax), %bl
+ cmpb (%edx), %bl
+ jne L(nonzero)
+
+ mov 1(%eax), %bl
+ cmpb 1(%edx), %bl
+ jne L(nonzero)
+
+ cmp $2, %ecx
+ jz L(0bytes)
+
+ mov 2(%eax), %bl
+ cmpb 2(%edx), %bl
+ jne L(nonzero)
+
+ cmp $3, %ecx
+ jz L(0bytes)
+
+ mov 3(%eax), %bl
+ cmpb 3(%edx), %bl
+ jne L(nonzero)
+
+ cmp $4, %ecx
+ jz L(0bytes)
+
+ mov 4(%eax), %bl
+ cmpb 4(%edx), %bl
+ jne L(nonzero)
+
+ cmp $5, %ecx
+ jz L(0bytes)
+
+ mov 5(%eax), %bl
+ cmpb 5(%edx), %bl
+ jne L(nonzero)
+
+ cmp $6, %ecx
+ jz L(0bytes)
+
+ mov 6(%eax), %bl
+ cmpb 6(%edx), %bl
+ je L(0bytes)
+
+L(nonzero):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(above)
+ neg %eax
+L(above):
+ ret
+ CFI_PUSH (%ebx)
+# endif
+
+ .p2align 4
+L(0bytes):
+ POP (%ebx)
+ xor %eax, %eax
+ ret
+
+# ifdef USE_AS_WMEMCMP
+
+/* for wmemcmp, case N == 1 */
+
+ .p2align 4
+L(less8bytes):
+ mov (%eax), %ecx
+ cmp (%edx), %ecx
+ je L(return0)
+ mov $1, %eax
+ jg L(find_diff_bigger)
+ neg %eax
+ ret
+
+ .p2align 4
+L(find_diff_bigger):
+ ret
+
+ .p2align 4
+L(return0):
+ xor %eax, %eax
+ ret
+# endif
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(less1bytes):
+ jb L(0bytesend)
+ movzbl (%eax), %eax
+ movzbl (%edx), %edx
+ sub %edx, %eax
+ ret
+
+ .p2align 4
+L(0bytesend):
+ xor %eax, %eax
+ ret
+# endif
+ .p2align 4
+L(64bytesormore):
+ PUSH (%ebx)
+ mov %ecx, %ebx
+ mov $64, %ecx
+ sub $64, %ebx
+L(64bytesormore_loop):
+ movdqu (%eax), %xmm1
+ movdqu (%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_16diff)
+
+ movdqu 16(%eax), %xmm1
+ movdqu 16(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_32diff)
+
+ movdqu 32(%eax), %xmm1
+ movdqu 32(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_48diff)
+
+ movdqu 48(%eax), %xmm1
+ movdqu 48(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_64diff)
+ add %ecx, %eax
+ add %ecx, %edx
+ sub %ecx, %ebx
+ jae L(64bytesormore_loop)
+ add %ebx, %ecx
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
+
+# ifdef USE_AS_WMEMCMP
+
+/* Label needs only for table_64bytes filling */
+L(unreal_case):
+/* no code here */
+
+# endif
+ .p2align 4
+L(find_16diff):
+ sub $16, %ecx
+L(find_32diff):
+ sub $16, %ecx
+L(find_48diff):
+ sub $16, %ecx
+L(find_64diff):
+ add %ecx, %edx
+ add %ecx, %eax
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(16bytes):
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+# else
+ .p2align 4
+L(16bytes):
+ mov -16(%eax), %ecx
+ cmp -16(%edx), %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ cmp -12(%edx), %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ cmp -8(%edx), %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ cmp -4(%edx), %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+# endif
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(49bytes):
+ movdqu -49(%eax), %xmm1
+ movdqu -49(%edx), %xmm2
+ mov $-49, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(33bytes):
+ movdqu -33(%eax), %xmm1
+ movdqu -33(%edx), %xmm2
+ mov $-33, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(17bytes):
+ mov -17(%eax), %ecx
+ mov -17(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(13bytes):
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(9bytes):
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(5bytes):
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(50bytes):
+ mov $-50, %ebx
+ movdqu -50(%eax), %xmm1
+ movdqu -50(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(34bytes):
+ mov $-34, %ebx
+ movdqu -34(%eax), %xmm1
+ movdqu -34(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(18bytes):
+ mov -18(%eax), %ecx
+ mov -18(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(14bytes):
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(10bytes):
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(6bytes):
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(51bytes):
+ mov $-51, %ebx
+ movdqu -51(%eax), %xmm1
+ movdqu -51(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(35bytes):
+ mov $-35, %ebx
+ movdqu -35(%eax), %xmm1
+ movdqu -35(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(19bytes):
+ movl -19(%eax), %ecx
+ movl -19(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+L(1bytes):
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+# endif
+ .p2align 4
+L(52bytes):
+ movdqu -52(%eax), %xmm1
+ movdqu -52(%edx), %xmm2
+ mov $-52, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(36bytes):
+ movdqu -36(%eax), %xmm1
+ movdqu -36(%edx), %xmm2
+ mov $-36, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(20bytes):
+ movdqu -20(%eax), %xmm1
+ movdqu -20(%edx), %xmm2
+ mov $-20, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -4(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -4(%edx), %ecx
+# endif
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(53bytes):
+ movdqu -53(%eax), %xmm1
+ movdqu -53(%edx), %xmm2
+ mov $-53, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(37bytes):
+ mov $-37, %ebx
+ movdqu -37(%eax), %xmm1
+ movdqu -37(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(21bytes):
+ mov $-21, %ebx
+ movdqu -21(%eax), %xmm1
+ movdqu -21(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(54bytes):
+ movdqu -54(%eax), %xmm1
+ movdqu -54(%edx), %xmm2
+ mov $-54, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(38bytes):
+ mov $-38, %ebx
+ movdqu -38(%eax), %xmm1
+ movdqu -38(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(22bytes):
+ mov $-22, %ebx
+ movdqu -22(%eax), %xmm1
+ movdqu -22(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(55bytes):
+ movdqu -55(%eax), %xmm1
+ movdqu -55(%edx), %xmm2
+ mov $-55, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(39bytes):
+ mov $-39, %ebx
+ movdqu -39(%eax), %xmm1
+ movdqu -39(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(23bytes):
+ mov $-23, %ebx
+ movdqu -23(%eax), %xmm1
+ movdqu -23(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+# endif
+ .p2align 4
+L(56bytes):
+ movdqu -56(%eax), %xmm1
+ movdqu -56(%edx), %xmm2
+ mov $-56, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(40bytes):
+ mov $-40, %ebx
+ movdqu -40(%eax), %xmm1
+ movdqu -40(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(24bytes):
+ mov $-24, %ebx
+ movdqu -24(%eax), %xmm1
+ movdqu -24(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -8(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -8(%edx), %ecx
+# endif
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -4(%edx), %ecx
+# endif
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(57bytes):
+ movdqu -57(%eax), %xmm1
+ movdqu -57(%edx), %xmm2
+ mov $-57, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(41bytes):
+ mov $-41, %ebx
+ movdqu -41(%eax), %xmm1
+ movdqu -41(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(25bytes):
+ mov $-25, %ebx
+ movdqu -25(%eax), %xmm1
+ movdqu -25(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(58bytes):
+ movdqu -58(%eax), %xmm1
+ movdqu -58(%edx), %xmm2
+ mov $-58, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(42bytes):
+ mov $-42, %ebx
+ movdqu -42(%eax), %xmm1
+ movdqu -42(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(26bytes):
+ mov $-26, %ebx
+ movdqu -26(%eax), %xmm1
+ movdqu -26(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(59bytes):
+ movdqu -59(%eax), %xmm1
+ movdqu -59(%edx), %xmm2
+ mov $-59, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(43bytes):
+ mov $-43, %ebx
+ movdqu -43(%eax), %xmm1
+ movdqu -43(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(27bytes):
+ mov $-27, %ebx
+ movdqu -27(%eax), %xmm1
+ movdqu -27(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+# endif
+ .p2align 4
+L(60bytes):
+ movdqu -60(%eax), %xmm1
+ movdqu -60(%edx), %xmm2
+ mov $-60, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(44bytes):
+ mov $-44, %ebx
+ movdqu -44(%eax), %xmm1
+ movdqu -44(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(28bytes):
+ mov $-28, %ebx
+ movdqu -28(%eax), %xmm1
+ movdqu -28(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -12(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -12(%edx), %ecx
+# endif
+ jne L(find_diff)
+
+ mov -8(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -8(%edx), %ecx
+# endif
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -4(%edx), %ecx
+# endif
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(61bytes):
+ movdqu -61(%eax), %xmm1
+ movdqu -61(%edx), %xmm2
+ mov $-61, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(45bytes):
+ mov $-45, %ebx
+ movdqu -45(%eax), %xmm1
+ movdqu -45(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(29bytes):
+ mov $-29, %ebx
+ movdqu -29(%eax), %xmm1
+ movdqu -29(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(62bytes):
+ movdqu -62(%eax), %xmm1
+ movdqu -62(%edx), %xmm2
+ mov $-62, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(46bytes):
+ mov $-46, %ebx
+ movdqu -46(%eax), %xmm1
+ movdqu -46(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(30bytes):
+ mov $-30, %ebx
+ movdqu -30(%eax), %xmm1
+ movdqu -30(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ .p2align 4
+L(63bytes):
+ movdqu -63(%eax), %xmm1
+ movdqu -63(%edx), %xmm2
+ mov $-63, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(47bytes):
+ mov $-47, %ebx
+ movdqu -47(%eax), %xmm1
+ movdqu -47(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(31bytes):
+ mov $-31, %ebx
+ movdqu -31(%eax), %xmm1
+ movdqu -31(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+# endif
+
+ .p2align 4
+L(64bytes):
+ movdqu -64(%eax), %xmm1
+ movdqu -64(%edx), %xmm2
+ mov $-64, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(48bytes):
+ movdqu -48(%eax), %xmm1
+ movdqu -48(%edx), %xmm2
+ mov $-48, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(32bytes):
+ movdqu -32(%eax), %xmm1
+ movdqu -32(%edx), %xmm2
+ mov $-32, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -16(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -16(%edx), %ecx
+# endif
+ jne L(find_diff)
+
+ mov -12(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -12(%edx), %ecx
+# endif
+ jne L(find_diff)
+
+ mov -8(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -8(%edx), %ecx
+# endif
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+# ifndef USE_AS_WMEMCMP
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+# else
+ cmp -4(%edx), %ecx
+# endif
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(less16bytes):
+ add %ebx, %eax
+ add %ebx, %edx
+
+ mov (%eax), %ecx
+ mov (%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 4(%eax), %ecx
+ mov 4(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 8(%eax), %ecx
+ mov 8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 12(%eax), %ecx
+ mov 12(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+# else
+ .p2align 4
+L(less16bytes):
+ add %ebx, %eax
+ add %ebx, %edx
+
+ mov (%eax), %ecx
+ cmp (%edx), %ecx
+ jne L(find_diff)
+
+ mov 4(%eax), %ecx
+ cmp 4(%edx), %ecx
+ jne L(find_diff)
+
+ mov 8(%eax), %ecx
+ cmp 8(%edx), %ecx
+ jne L(find_diff)
+
+ mov 12(%eax), %ecx
+ cmp 12(%edx), %ecx
+
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+# endif
+
+ .p2align 4
+L(find_diff):
+# ifndef USE_AS_WMEMCMP
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ shr $16,%ecx
+ shr $16,%ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+L(end):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(bigger)
+ neg %eax
+L(bigger):
+ ret
+# else
+ POP (%ebx)
+ mov $1, %eax
+ jg L(bigger)
+ neg %eax
+ ret
+
+ .p2align 4
+L(bigger):
+ ret
+# endif
+END (MEMCMP)
+
+ .section .rodata.sse4.2,"a",@progbits
+ .p2align 2
+ .type L(table_64bytes), @object
+# ifndef USE_AS_WMEMCMP
+L(table_64bytes):
+ .int JMPTBL (L(0bytes), L(table_64bytes))
+ .int JMPTBL (L(1bytes), L(table_64bytes))
+ .int JMPTBL (L(2bytes), L(table_64bytes))
+ .int JMPTBL (L(3bytes), L(table_64bytes))
+ .int JMPTBL (L(4bytes), L(table_64bytes))
+ .int JMPTBL (L(5bytes), L(table_64bytes))
+ .int JMPTBL (L(6bytes), L(table_64bytes))
+ .int JMPTBL (L(7bytes), L(table_64bytes))
+ .int JMPTBL (L(8bytes), L(table_64bytes))
+ .int JMPTBL (L(9bytes), L(table_64bytes))
+ .int JMPTBL (L(10bytes), L(table_64bytes))
+ .int JMPTBL (L(11bytes), L(table_64bytes))
+ .int JMPTBL (L(12bytes), L(table_64bytes))
+ .int JMPTBL (L(13bytes), L(table_64bytes))
+ .int JMPTBL (L(14bytes), L(table_64bytes))
+ .int JMPTBL (L(15bytes), L(table_64bytes))
+ .int JMPTBL (L(16bytes), L(table_64bytes))
+ .int JMPTBL (L(17bytes), L(table_64bytes))
+ .int JMPTBL (L(18bytes), L(table_64bytes))
+ .int JMPTBL (L(19bytes), L(table_64bytes))
+ .int JMPTBL (L(20bytes), L(table_64bytes))
+ .int JMPTBL (L(21bytes), L(table_64bytes))
+ .int JMPTBL (L(22bytes), L(table_64bytes))
+ .int JMPTBL (L(23bytes), L(table_64bytes))
+ .int JMPTBL (L(24bytes), L(table_64bytes))
+ .int JMPTBL (L(25bytes), L(table_64bytes))
+ .int JMPTBL (L(26bytes), L(table_64bytes))
+ .int JMPTBL (L(27bytes), L(table_64bytes))
+ .int JMPTBL (L(28bytes), L(table_64bytes))
+ .int JMPTBL (L(29bytes), L(table_64bytes))
+ .int JMPTBL (L(30bytes), L(table_64bytes))
+ .int JMPTBL (L(31bytes), L(table_64bytes))
+ .int JMPTBL (L(32bytes), L(table_64bytes))
+ .int JMPTBL (L(33bytes), L(table_64bytes))
+ .int JMPTBL (L(34bytes), L(table_64bytes))
+ .int JMPTBL (L(35bytes), L(table_64bytes))
+ .int JMPTBL (L(36bytes), L(table_64bytes))
+ .int JMPTBL (L(37bytes), L(table_64bytes))
+ .int JMPTBL (L(38bytes), L(table_64bytes))
+ .int JMPTBL (L(39bytes), L(table_64bytes))
+ .int JMPTBL (L(40bytes), L(table_64bytes))
+ .int JMPTBL (L(41bytes), L(table_64bytes))
+ .int JMPTBL (L(42bytes), L(table_64bytes))
+ .int JMPTBL (L(43bytes), L(table_64bytes))
+ .int JMPTBL (L(44bytes), L(table_64bytes))
+ .int JMPTBL (L(45bytes), L(table_64bytes))
+ .int JMPTBL (L(46bytes), L(table_64bytes))
+ .int JMPTBL (L(47bytes), L(table_64bytes))
+ .int JMPTBL (L(48bytes), L(table_64bytes))
+ .int JMPTBL (L(49bytes), L(table_64bytes))
+ .int JMPTBL (L(50bytes), L(table_64bytes))
+ .int JMPTBL (L(51bytes), L(table_64bytes))
+ .int JMPTBL (L(52bytes), L(table_64bytes))
+ .int JMPTBL (L(53bytes), L(table_64bytes))
+ .int JMPTBL (L(54bytes), L(table_64bytes))
+ .int JMPTBL (L(55bytes), L(table_64bytes))
+ .int JMPTBL (L(56bytes), L(table_64bytes))
+ .int JMPTBL (L(57bytes), L(table_64bytes))
+ .int JMPTBL (L(58bytes), L(table_64bytes))
+ .int JMPTBL (L(59bytes), L(table_64bytes))
+ .int JMPTBL (L(60bytes), L(table_64bytes))
+ .int JMPTBL (L(61bytes), L(table_64bytes))
+ .int JMPTBL (L(62bytes), L(table_64bytes))
+ .int JMPTBL (L(63bytes), L(table_64bytes))
+ .int JMPTBL (L(64bytes), L(table_64bytes))
+# else
+L(table_64bytes):
+ .int JMPTBL (L(0bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(4bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(8bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(12bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(16bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(20bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(24bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(28bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(32bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(36bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(40bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(44bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(48bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(52bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(56bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(60bytes), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(unreal_case), L(table_64bytes))
+ .int JMPTBL (L(64bytes), L(table_64bytes))
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
new file mode 100644
index 0000000000..5ebf5a4d73
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
@@ -0,0 +1,2157 @@
+/* memcmp with SSSE3, wmemcmp with SSSE3
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef MEMCMP
+# define MEMCMP __memcmp_ssse3
+# endif
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define BLK1 PARMS
+# define BLK2 BLK1+4
+# define LEN BLK2+4
+# define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
+# define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
+
+/* Warning!
+ wmemcmp has to use SIGNED comparison for elements.
+ memcmp has to use UNSIGNED comparison for elemnts.
+*/
+
+ atom_text_section
+ENTRY (MEMCMP)
+ movl LEN(%esp), %ecx
+
+# ifdef USE_AS_WMEMCMP
+ shl $2, %ecx
+ test %ecx, %ecx
+ jz L(zero)
+# endif
+
+ movl BLK1(%esp), %eax
+ cmp $48, %ecx
+ movl BLK2(%esp), %edx
+ jae L(48bytesormore)
+
+# ifndef USE_AS_WMEMCMP
+ cmp $1, %ecx
+ jbe L(less1bytes)
+# endif
+
+ PUSH (%ebx)
+ add %ecx, %edx
+ add %ecx, %eax
+ jmp L(less48bytes)
+
+ CFI_POP (%ebx)
+
+# ifndef USE_AS_WMEMCMP
+ .p2align 4
+L(less1bytes):
+ jb L(zero)
+ movb (%eax), %cl
+ cmp (%edx), %cl
+ je L(zero)
+ mov $1, %eax
+ ja L(1bytesend)
+ neg %eax
+L(1bytesend):
+ ret
+# endif
+
+ .p2align 4
+L(zero):
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(48bytesormore):
+ PUSH (%ebx)
+ PUSH (%esi)
+ PUSH (%edi)
+ cfi_remember_state
+ movdqu (%eax), %xmm3
+ movdqu (%edx), %xmm0
+ movl %eax, %edi
+ movl %edx, %esi
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 16(%edi), %edi
+
+ sub $0xffff, %edx
+ lea 16(%esi), %esi
+ jnz L(less16bytes)
+ mov %edi, %edx
+ and $0xf, %edx
+ xor %edx, %edi
+ sub %edx, %esi
+ add %edx, %ecx
+ mov %esi, %edx
+ and $0xf, %edx
+ jz L(shr_0)
+ xor %edx, %esi
+
+# ifndef USE_AS_WMEMCMP
+ cmp $8, %edx
+ jae L(next_unaligned_table)
+ cmp $0, %edx
+ je L(shr_0)
+ cmp $1, %edx
+ je L(shr_1)
+ cmp $2, %edx
+ je L(shr_2)
+ cmp $3, %edx
+ je L(shr_3)
+ cmp $4, %edx
+ je L(shr_4)
+ cmp $5, %edx
+ je L(shr_5)
+ cmp $6, %edx
+ je L(shr_6)
+ jmp L(shr_7)
+
+ .p2align 2
+L(next_unaligned_table):
+ cmp $8, %edx
+ je L(shr_8)
+ cmp $9, %edx
+ je L(shr_9)
+ cmp $10, %edx
+ je L(shr_10)
+ cmp $11, %edx
+ je L(shr_11)
+ cmp $12, %edx
+ je L(shr_12)
+ cmp $13, %edx
+ je L(shr_13)
+ cmp $14, %edx
+ je L(shr_14)
+ jmp L(shr_15)
+# else
+ cmp $0, %edx
+ je L(shr_0)
+ cmp $4, %edx
+ je L(shr_4)
+ cmp $8, %edx
+ je L(shr_8)
+ jmp L(shr_12)
+# endif
+
+ .p2align 4
+L(shr_0):
+ cmp $80, %ecx
+ jae L(shr_0_gobble)
+ lea -48(%ecx), %ecx
+ xor %eax, %eax
+ movaps (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+ movaps 16(%esi), %xmm2
+ pcmpeqb 16(%edi), %xmm2
+ pand %xmm1, %xmm2
+ pmovmskb %xmm2, %edx
+ add $32, %edi
+ add $32, %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea (%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_0_gobble):
+ lea -48(%ecx), %ecx
+ movdqa (%esi), %xmm0
+ xor %eax, %eax
+ pcmpeqb (%edi), %xmm0
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm2
+ pcmpeqb 16(%edi), %xmm2
+L(shr_0_gobble_loop):
+ pand %xmm0, %xmm2
+ sub $32, %ecx
+ pmovmskb %xmm2, %edx
+ movdqa %xmm0, %xmm1
+ movdqa 32(%esi), %xmm0
+ movdqa 48(%esi), %xmm2
+ sbb $0xffff, %edx
+ pcmpeqb 32(%edi), %xmm0
+ pcmpeqb 48(%edi), %xmm2
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ jz L(shr_0_gobble_loop)
+
+ pand %xmm0, %xmm2
+ cmp $0, %ecx
+ jge L(shr_0_gobble_loop_next)
+ inc %edx
+ add $32, %ecx
+L(shr_0_gobble_loop_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm2, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea (%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+# ifndef USE_AS_WMEMCMP
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_1):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_1_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $1,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $1,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 1(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_1_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $1,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $1,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_1_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $1,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $1,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_1_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_1_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_1_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 1(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_2):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_2_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $2,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $2,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 2(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_2_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $2,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $2,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_2_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $2,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $2,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_2_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_2_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_2_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 2(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_3):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_3_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $3,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $3,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 3(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_3_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $3,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $3,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_3_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $3,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $3,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_3_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_3_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_3_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 3(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+# endif
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_4):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_4_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $4,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $4,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 4(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_4_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $4,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $4,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_4_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $4,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $4,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_4_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_4_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_4_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 4(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+# ifndef USE_AS_WMEMCMP
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_5):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_5_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $5,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $5,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 5(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_5_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $5,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $5,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_5_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $5,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $5,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_5_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_5_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_5_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 5(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_6):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_6_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $6,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $6,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 6(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_6_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $6,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $6,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_6_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $6,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $6,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_6_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_6_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_6_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 6(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_7):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_7_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $7,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $7,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 7(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_7_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $7,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $7,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_7_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $7,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $7,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_7_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_7_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_7_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 7(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+# endif
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_8):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_8_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $8,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $8,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 8(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_8_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $8,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $8,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_8_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $8,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $8,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_8_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_8_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_8_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 8(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+# ifndef USE_AS_WMEMCMP
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_9):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_9_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $9,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $9,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 9(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_9_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $9,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $9,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_9_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $9,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $9,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_9_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_9_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_9_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 9(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_10):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_10_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $10, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $10,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 10(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_10_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $10, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $10, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_10_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $10,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $10,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_10_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_10_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_10_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 10(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_11):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_11_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $11, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $11, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 11(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_11_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $11, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $11, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_11_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $11,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $11,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_11_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_11_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_11_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 11(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+# endif
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_12):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_12_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $12, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $12, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 12(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_12_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $12, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $12, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_12_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $12,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $12,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_12_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_12_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_12_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 12(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+# ifndef USE_AS_WMEMCMP
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_13):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_13_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $13, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $13, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 13(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_13_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $13, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $13, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_13_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $13,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $13,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_13_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_13_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_13_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 13(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_14):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_14_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $14, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $14, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 14(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_14_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $14, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $14, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_14_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $14,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $14,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_14_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_14_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_14_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 14(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_15):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_15_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $15, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $15, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 15(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(shr_15_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $15, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $15, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_15_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $15,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $15,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_15_gobble_loop)
+ pand %xmm0, %xmm3
+
+ cmp $0, %ecx
+ jge L(shr_15_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_15_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 15(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+# endif
+
+ cfi_restore_state
+ cfi_remember_state
+ .p2align 4
+L(exit):
+ pmovmskb %xmm1, %ebx
+ sub $0xffff, %ebx
+ jz L(first16bytes)
+ lea -16(%esi), %esi
+ lea -16(%edi), %edi
+ mov %ebx, %edx
+
+L(first16bytes):
+ add %eax, %esi
+L(less16bytes):
+
+# ifndef USE_AS_WMEMCMP
+ test %dl, %dl
+ jz L(next_24_bytes)
+
+ test $0x01, %dl
+ jnz L(Byte16)
+
+ test $0x02, %dl
+ jnz L(Byte17)
+
+ test $0x04, %dl
+ jnz L(Byte18)
+
+ test $0x08, %dl
+ jnz L(Byte19)
+
+ test $0x10, %dl
+ jnz L(Byte20)
+
+ test $0x20, %dl
+ jnz L(Byte21)
+
+ test $0x40, %dl
+ jnz L(Byte22)
+L(Byte23):
+ movzbl -9(%edi), %eax
+ movzbl -9(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte16):
+ movzbl -16(%edi), %eax
+ movzbl -16(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte17):
+ movzbl -15(%edi), %eax
+ movzbl -15(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte18):
+ movzbl -14(%edi), %eax
+ movzbl -14(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte19):
+ movzbl -13(%edi), %eax
+ movzbl -13(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte20):
+ movzbl -12(%edi), %eax
+ movzbl -12(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte21):
+ movzbl -11(%edi), %eax
+ movzbl -11(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(Byte22):
+ movzbl -10(%edi), %eax
+ movzbl -10(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ .p2align 4
+L(next_24_bytes):
+ lea 8(%edi), %edi
+ lea 8(%esi), %esi
+ test $0x01, %dh
+ jnz L(Byte16)
+
+ test $0x02, %dh
+ jnz L(Byte17)
+
+ test $0x04, %dh
+ jnz L(Byte18)
+
+ test $0x08, %dh
+ jnz L(Byte19)
+
+ test $0x10, %dh
+ jnz L(Byte20)
+
+ test $0x20, %dh
+ jnz L(Byte21)
+
+ test $0x40, %dh
+ jnz L(Byte22)
+
+ .p2align 4
+L(Byte31):
+ movzbl -9(%edi), %eax
+ movzbl -9(%esi), %edx
+ sub %edx, %eax
+ RETURN_END
+# else
+
+/* special for wmemcmp */
+ xor %eax, %eax
+ test %dl, %dl
+ jz L(next_two_double_words)
+ and $15, %dl
+ jz L(second_double_word)
+ mov -16(%edi), %eax
+ cmp -16(%esi), %eax
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(second_double_word):
+ mov -12(%edi), %eax
+ cmp -12(%esi), %eax
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(next_two_double_words):
+ and $15, %dh
+ jz L(fourth_double_word)
+ mov -8(%edi), %eax
+ cmp -8(%esi), %eax
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(fourth_double_word):
+ mov -4(%edi), %eax
+ cmp -4(%esi), %eax
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(nequal):
+ mov $1, %eax
+ jg L(nequal_bigger)
+ neg %eax
+ RETURN
+
+ .p2align 4
+L(nequal_bigger):
+ RETURN_END
+# endif
+
+ CFI_PUSH (%ebx)
+
+ .p2align 4
+L(more8bytes):
+ cmp $16, %ecx
+ jae L(more16bytes)
+ cmp $8, %ecx
+ je L(8bytes)
+# ifndef USE_AS_WMEMCMP
+ cmp $9, %ecx
+ je L(9bytes)
+ cmp $10, %ecx
+ je L(10bytes)
+ cmp $11, %ecx
+ je L(11bytes)
+ cmp $12, %ecx
+ je L(12bytes)
+ cmp $13, %ecx
+ je L(13bytes)
+ cmp $14, %ecx
+ je L(14bytes)
+ jmp L(15bytes)
+# else
+ jmp L(12bytes)
+# endif
+
+ .p2align 4
+L(more16bytes):
+ cmp $24, %ecx
+ jae L(more24bytes)
+ cmp $16, %ecx
+ je L(16bytes)
+# ifndef USE_AS_WMEMCMP
+ cmp $17, %ecx
+ je L(17bytes)
+ cmp $18, %ecx
+ je L(18bytes)
+ cmp $19, %ecx
+ je L(19bytes)
+ cmp $20, %ecx
+ je L(20bytes)
+ cmp $21, %ecx
+ je L(21bytes)
+ cmp $22, %ecx
+ je L(22bytes)
+ jmp L(23bytes)
+# else
+ jmp L(20bytes)
+# endif
+
+ .p2align 4
+L(more24bytes):
+ cmp $32, %ecx
+ jae L(more32bytes)
+ cmp $24, %ecx
+ je L(24bytes)
+# ifndef USE_AS_WMEMCMP
+ cmp $25, %ecx
+ je L(25bytes)
+ cmp $26, %ecx
+ je L(26bytes)
+ cmp $27, %ecx
+ je L(27bytes)
+ cmp $28, %ecx
+ je L(28bytes)
+ cmp $29, %ecx
+ je L(29bytes)
+ cmp $30, %ecx
+ je L(30bytes)
+ jmp L(31bytes)
+# else
+ jmp L(28bytes)
+# endif
+
+ .p2align 4
+L(more32bytes):
+ cmp $40, %ecx
+ jae L(more40bytes)
+ cmp $32, %ecx
+ je L(32bytes)
+# ifndef USE_AS_WMEMCMP
+ cmp $33, %ecx
+ je L(33bytes)
+ cmp $34, %ecx
+ je L(34bytes)
+ cmp $35, %ecx
+ je L(35bytes)
+ cmp $36, %ecx
+ je L(36bytes)
+ cmp $37, %ecx
+ je L(37bytes)
+ cmp $38, %ecx
+ je L(38bytes)
+ jmp L(39bytes)
+# else
+ jmp L(36bytes)
+# endif
+
+ .p2align 4
+L(less48bytes):
+ cmp $8, %ecx
+ jae L(more8bytes)
+# ifndef USE_AS_WMEMCMP
+ cmp $2, %ecx
+ je L(2bytes)
+ cmp $3, %ecx
+ je L(3bytes)
+ cmp $4, %ecx
+ je L(4bytes)
+ cmp $5, %ecx
+ je L(5bytes)
+ cmp $6, %ecx
+ je L(6bytes)
+ jmp L(7bytes)
+# else
+ jmp L(4bytes)
+# endif
+
+ .p2align 4
+L(more40bytes):
+ cmp $40, %ecx
+ je L(40bytes)
+# ifndef USE_AS_WMEMCMP
+ cmp $41, %ecx
+ je L(41bytes)
+ cmp $42, %ecx
+ je L(42bytes)
+ cmp $43, %ecx
+ je L(43bytes)
+ cmp $44, %ecx
+ je L(44bytes)
+ cmp $45, %ecx
+ je L(45bytes)
+ cmp $46, %ecx
+ je L(46bytes)
+ jmp L(47bytes)
+
+ .p2align 4
+L(44bytes):
+ mov -44(%eax), %ecx
+ mov -44(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(40bytes):
+ mov -40(%eax), %ecx
+ mov -40(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(36bytes):
+ mov -36(%eax), %ecx
+ mov -36(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(32bytes):
+ mov -32(%eax), %ecx
+ mov -32(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(28bytes):
+ mov -28(%eax), %ecx
+ mov -28(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(24bytes):
+ mov -24(%eax), %ecx
+ mov -24(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(20bytes):
+ mov -20(%eax), %ecx
+ mov -20(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(16bytes):
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+# else
+ .p2align 4
+L(44bytes):
+ mov -44(%eax), %ecx
+ cmp -44(%edx), %ecx
+ jne L(find_diff)
+L(40bytes):
+ mov -40(%eax), %ecx
+ cmp -40(%edx), %ecx
+ jne L(find_diff)
+L(36bytes):
+ mov -36(%eax), %ecx
+ cmp -36(%edx), %ecx
+ jne L(find_diff)
+L(32bytes):
+ mov -32(%eax), %ecx
+ cmp -32(%edx), %ecx
+ jne L(find_diff)
+L(28bytes):
+ mov -28(%eax), %ecx
+ cmp -28(%edx), %ecx
+ jne L(find_diff)
+L(24bytes):
+ mov -24(%eax), %ecx
+ cmp -24(%edx), %ecx
+ jne L(find_diff)
+L(20bytes):
+ mov -20(%eax), %ecx
+ cmp -20(%edx), %ecx
+ jne L(find_diff)
+L(16bytes):
+ mov -16(%eax), %ecx
+ cmp -16(%edx), %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ cmp -12(%edx), %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ cmp -8(%edx), %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ xor %eax, %eax
+ cmp -4(%edx), %ecx
+ jne L(find_diff)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+# endif
+
+# ifndef USE_AS_WMEMCMP
+
+ .p2align 4
+L(45bytes):
+ mov -45(%eax), %ecx
+ mov -45(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(41bytes):
+ mov -41(%eax), %ecx
+ mov -41(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(37bytes):
+ mov -37(%eax), %ecx
+ mov -37(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(33bytes):
+ mov -33(%eax), %ecx
+ mov -33(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(29bytes):
+ mov -29(%eax), %ecx
+ mov -29(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(25bytes):
+ mov -25(%eax), %ecx
+ mov -25(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(21bytes):
+ mov -21(%eax), %ecx
+ mov -21(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(17bytes):
+ mov -17(%eax), %ecx
+ mov -17(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(13bytes):
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(9bytes):
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(5bytes):
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ .p2align 4
+L(46bytes):
+ mov -46(%eax), %ecx
+ mov -46(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(42bytes):
+ mov -42(%eax), %ecx
+ mov -42(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(38bytes):
+ mov -38(%eax), %ecx
+ mov -38(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(34bytes):
+ mov -34(%eax), %ecx
+ mov -34(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(30bytes):
+ mov -30(%eax), %ecx
+ mov -30(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(26bytes):
+ mov -26(%eax), %ecx
+ mov -26(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(22bytes):
+ mov -22(%eax), %ecx
+ mov -22(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(18bytes):
+ mov -18(%eax), %ecx
+ mov -18(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(14bytes):
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(10bytes):
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(6bytes):
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ .p2align 4
+L(47bytes):
+ movl -47(%eax), %ecx
+ movl -47(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(43bytes):
+ movl -43(%eax), %ecx
+ movl -43(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(39bytes):
+ movl -39(%eax), %ecx
+ movl -39(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(35bytes):
+ movl -35(%eax), %ecx
+ movl -35(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(31bytes):
+ movl -31(%eax), %ecx
+ movl -31(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(27bytes):
+ movl -27(%eax), %ecx
+ movl -27(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(23bytes):
+ movl -23(%eax), %ecx
+ movl -23(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(19bytes):
+ movl -19(%eax), %ecx
+ movl -19(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ .p2align 4
+L(find_diff):
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ shr $16,%ecx
+ shr $16,%ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+
+ .p2align 4
+L(end):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(bigger)
+ neg %eax
+L(bigger):
+ ret
+# else
+
+/* for wmemcmp */
+ .p2align 4
+L(find_diff):
+ POP (%ebx)
+ mov $1, %eax
+ jg L(find_diff_bigger)
+ neg %eax
+ ret
+
+ .p2align 4
+L(find_diff_bigger):
+ ret
+
+# endif
+END (MEMCMP)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp.S
new file mode 100644
index 0000000000..1fc5994a17
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcmp.S
@@ -0,0 +1,62 @@
+/* Multiple versions of memcmp
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+ .text
+ENTRY(memcmp)
+ .type memcmp, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memcmp_ia32)
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcmp_ssse3)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcmp_sse4_2)
+2: ret
+END(memcmp)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memcmp_ia32, @function; \
+ .p2align 4; \
+ .globl __memcmp_ia32; \
+ .hidden __memcmp_ia32; \
+ __memcmp_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memcmp; __GI_memcmp = __memcmp_ia32
+# endif
+#endif
+
+#include "../memcmp.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
new file mode 100644
index 0000000000..2fe2072cb1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
@@ -0,0 +1,681 @@
+/* memcpy optimized with SSE2 unaligned memory access instructions.
+ Copyright (C) 2014-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) \
+ && (defined SHARED \
+ || defined USE_AS_MEMMOVE \
+ || !defined USE_MULTIARCH)
+
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+# ifndef MEMCPY
+# define MEMCPY __memcpy_sse2_unaligned
+# define MEMCPY_CHK __memcpy_chk_sse2_unaligned
+# endif
+
+# ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+# else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+# endif
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+
+ .section .text.sse2,"ax",@progbits
+# if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+# endif
+
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+ cmp %edx, %eax
+
+# ifdef USE_AS_MEMMOVE
+ jg L(check_forward)
+
+L(mm_len_0_or_more_backward):
+/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
+ separately. */
+ cmp $16, %ecx
+ jbe L(mm_len_0_16_bytes_backward)
+
+ cmpl $32, %ecx
+ jg L(mm_len_32_or_more_backward)
+
+/* Copy [0..32] and return. */
+ movdqu (%eax), %xmm0
+ movdqu -16(%eax, %ecx), %xmm1
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, -16(%edx, %ecx)
+ jmp L(return)
+
+L(mm_len_32_or_more_backward):
+ cmpl $64, %ecx
+ jg L(mm_len_64_or_more_backward)
+
+/* Copy [0..64] and return. */
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu -16(%eax, %ecx), %xmm2
+ movdqu -32(%eax, %ecx), %xmm3
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, 16(%edx)
+ movdqu %xmm2, -16(%edx, %ecx)
+ movdqu %xmm3, -32(%edx, %ecx)
+ jmp L(return)
+
+L(mm_len_64_or_more_backward):
+ cmpl $128, %ecx
+ jg L(mm_len_128_or_more_backward)
+
+/* Copy [0..128] and return. */
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu 32(%eax), %xmm2
+ movdqu 48(%eax), %xmm3
+ movdqu -64(%eax, %ecx), %xmm4
+ movdqu -48(%eax, %ecx), %xmm5
+ movdqu -32(%eax, %ecx), %xmm6
+ movdqu -16(%eax, %ecx), %xmm7
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, 16(%edx)
+ movdqu %xmm2, 32(%edx)
+ movdqu %xmm3, 48(%edx)
+ movdqu %xmm4, -64(%edx, %ecx)
+ movdqu %xmm5, -48(%edx, %ecx)
+ movdqu %xmm6, -32(%edx, %ecx)
+ movdqu %xmm7, -16(%edx, %ecx)
+ jmp L(return)
+
+L(mm_len_128_or_more_backward):
+ add %ecx, %eax
+ cmp %edx, %eax
+ movl SRC(%esp), %eax
+ jle L(forward)
+ PUSH (%esi)
+ PUSH (%edi)
+ PUSH (%ebx)
+
+/* Aligning the address of destination. */
+ movdqu (%eax), %xmm4
+ movdqu 16(%eax), %xmm5
+ movdqu 32(%eax), %xmm6
+ movdqu 48(%eax), %xmm7
+ leal (%edx, %ecx), %esi
+ movdqu -16(%eax, %ecx), %xmm0
+ subl $16, %esp
+ movdqu %xmm0, (%esp)
+ mov %ecx, %edi
+ movl %esi, %ecx
+ andl $-16, %ecx
+ leal (%ecx), %ebx
+ subl %edx, %ebx
+ leal (%eax, %ebx), %eax
+ shrl $6, %ebx
+
+# ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %edi
+# else
+# ifdef SHARED
+ PUSH (%ebx)
+ SETUP_PIC_REG (bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi
+ POP (%ebx)
+# else
+ cmp __x86_shared_cache_size_half, %edi
+# endif
+# endif
+ jae L(mm_large_page_loop_backward)
+
+ .p2align 4
+L(mm_main_loop_backward):
+
+ prefetcht0 -128(%eax)
+
+ movdqu -64(%eax), %xmm0
+ movdqu -48(%eax), %xmm1
+ movdqu -32(%eax), %xmm2
+ movdqu -16(%eax), %xmm3
+ movaps %xmm0, -64(%ecx)
+ subl $64, %eax
+ movaps %xmm1, -48(%ecx)
+ movaps %xmm2, -32(%ecx)
+ movaps %xmm3, -16(%ecx)
+ subl $64, %ecx
+ sub $1, %ebx
+ jnz L(mm_main_loop_backward)
+ movdqu (%esp), %xmm0
+ addl $16, %esp
+ movdqu %xmm0, -16(%esi)
+ movdqu %xmm4, (%edx)
+ movdqu %xmm5, 16(%edx)
+ movdqu %xmm6, 32(%edx)
+ movdqu %xmm7, 48(%edx)
+ POP (%ebx)
+ jmp L(mm_return_pop_all)
+
+/* Copy [0..16] and return. */
+L(mm_len_0_16_bytes_backward):
+ testb $24, %cl
+ jnz L(mm_len_9_16_bytes_backward)
+ testb $4, %cl
+ .p2align 4,,5
+ jnz L(mm_len_5_8_bytes_backward)
+ testl %ecx, %ecx
+ .p2align 4,,2
+ je L(return)
+ testb $2, %cl
+ .p2align 4,,1
+ jne L(mm_len_3_4_bytes_backward)
+ movzbl -1(%eax,%ecx), %ebx
+ movzbl (%eax), %eax
+ movb %bl, -1(%edx,%ecx)
+ movb %al, (%edx)
+ jmp L(return)
+
+L(mm_len_3_4_bytes_backward):
+ movzwl -2(%eax,%ecx), %ebx
+ movzwl (%eax), %eax
+ movw %bx, -2(%edx,%ecx)
+ movw %ax, (%edx)
+ jmp L(return)
+
+L(mm_len_9_16_bytes_backward):
+ PUSH (%esi)
+ movl -4(%eax,%ecx), %ebx
+ movl -8(%eax,%ecx), %esi
+ movl %ebx, -4(%edx,%ecx)
+ movl %esi, -8(%edx,%ecx)
+ subl $8, %ecx
+ POP (%esi)
+ jmp L(mm_len_0_16_bytes_backward)
+
+L(mm_len_5_8_bytes_backward):
+ movl (%eax), %ebx
+ movl -4(%eax,%ecx), %eax
+ movl %ebx, (%edx)
+ movl %eax, -4(%edx,%ecx)
+ jmp L(return)
+
+/* Big length copy backward part. */
+ .p2align 4
+L(mm_large_page_loop_backward):
+ movdqu -64(%eax), %xmm0
+ movdqu -48(%eax), %xmm1
+ movdqu -32(%eax), %xmm2
+ movdqu -16(%eax), %xmm3
+ movntdq %xmm0, -64(%ecx)
+ subl $64, %eax
+ movntdq %xmm1, -48(%ecx)
+ movntdq %xmm2, -32(%ecx)
+ movntdq %xmm3, -16(%ecx)
+ subl $64, %ecx
+ sub $1, %ebx
+ jnz L(mm_large_page_loop_backward)
+ sfence
+ movdqu (%esp), %xmm0
+ addl $16, %esp
+ movdqu %xmm0, -16(%esi)
+ movdqu %xmm4, (%edx)
+ movdqu %xmm5, 16(%edx)
+ movdqu %xmm6, 32(%edx)
+ movdqu %xmm7, 48(%edx)
+ POP (%ebx)
+ jmp L(mm_return_pop_all)
+
+L(check_forward):
+ add %edx, %ecx
+ cmp %eax, %ecx
+ movl LEN(%esp), %ecx
+ jle L(forward)
+
+/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
+ separately. */
+ cmp $16, %ecx
+ jbe L(mm_len_0_16_bytes_forward)
+
+ cmpl $32, %ecx
+ ja L(mm_len_32_or_more_forward)
+
+/* Copy [0..32] and return. */
+ movdqu (%eax), %xmm0
+ movdqu -16(%eax, %ecx), %xmm1
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, -16(%edx, %ecx)
+ jmp L(return)
+
+L(mm_len_32_or_more_forward):
+ cmpl $64, %ecx
+ ja L(mm_len_64_or_more_forward)
+
+/* Copy [0..64] and return. */
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu -16(%eax, %ecx), %xmm2
+ movdqu -32(%eax, %ecx), %xmm3
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, 16(%edx)
+ movdqu %xmm2, -16(%edx, %ecx)
+ movdqu %xmm3, -32(%edx, %ecx)
+ jmp L(return)
+
+L(mm_len_64_or_more_forward):
+ cmpl $128, %ecx
+ ja L(mm_len_128_or_more_forward)
+
+/* Copy [0..128] and return. */
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu 32(%eax), %xmm2
+ movdqu 48(%eax), %xmm3
+ movdqu -64(%eax, %ecx), %xmm4
+ movdqu -48(%eax, %ecx), %xmm5
+ movdqu -32(%eax, %ecx), %xmm6
+ movdqu -16(%eax, %ecx), %xmm7
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, 16(%edx)
+ movdqu %xmm2, 32(%edx)
+ movdqu %xmm3, 48(%edx)
+ movdqu %xmm4, -64(%edx, %ecx)
+ movdqu %xmm5, -48(%edx, %ecx)
+ movdqu %xmm6, -32(%edx, %ecx)
+ movdqu %xmm7, -16(%edx, %ecx)
+ jmp L(return)
+
+L(mm_len_128_or_more_forward):
+ PUSH (%esi)
+ PUSH (%edi)
+ PUSH (%ebx)
+
+/* Aligning the address of destination. */
+ movdqu -16(%eax, %ecx), %xmm4
+ movdqu -32(%eax, %ecx), %xmm5
+ movdqu -48(%eax, %ecx), %xmm6
+ movdqu -64(%eax, %ecx), %xmm7
+ leal (%edx, %ecx), %esi
+ movdqu (%eax), %xmm0
+ subl $16, %esp
+ movdqu %xmm0, (%esp)
+ mov %ecx, %edi
+ leal 16(%edx), %ecx
+ andl $-16, %ecx
+ movl %ecx, %ebx
+ subl %edx, %ebx
+ addl %ebx, %eax
+ movl %esi, %ebx
+ subl %ecx, %ebx
+ shrl $6, %ebx
+
+# ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %edi
+# else
+# ifdef SHARED
+ PUSH (%ebx)
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi
+ POP (%ebx)
+# else
+ cmp __x86_shared_cache_size_half, %edi
+# endif
+# endif
+ jae L(mm_large_page_loop_forward)
+
+ .p2align 4
+L(mm_main_loop_forward):
+
+ prefetcht0 128(%eax)
+
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu 32(%eax), %xmm2
+ movdqu 48(%eax), %xmm3
+ movdqa %xmm0, (%ecx)
+ addl $64, %eax
+ movaps %xmm1, 16(%ecx)
+ movaps %xmm2, 32(%ecx)
+ movaps %xmm3, 48(%ecx)
+ addl $64, %ecx
+ sub $1, %ebx
+ jnz L(mm_main_loop_forward)
+ movdqu (%esp), %xmm0
+ addl $16, %esp
+ movdqu %xmm0, (%edx)
+ movdqu %xmm4, -16(%esi)
+ movdqu %xmm5, -32(%esi)
+ movdqu %xmm6, -48(%esi)
+ movdqu %xmm7, -64(%esi)
+ POP (%ebx)
+ jmp L(mm_return_pop_all)
+
+L(mm_len_0_16_bytes_forward):
+ testb $24, %cl
+ jne L(mm_len_9_16_bytes_forward)
+ testb $4, %cl
+ .p2align 4,,5
+ jne L(mm_len_5_8_bytes_forward)
+ testl %ecx, %ecx
+ .p2align 4,,2
+ je L(return)
+ testb $2, %cl
+ .p2align 4,,1
+ jne L(mm_len_2_4_bytes_forward)
+ movzbl -1(%eax,%ecx), %ebx
+ movzbl (%eax), %eax
+ movb %bl, -1(%edx,%ecx)
+ movb %al, (%edx)
+ jmp L(return)
+
+L(mm_len_2_4_bytes_forward):
+ movzwl -2(%eax,%ecx), %ebx
+ movzwl (%eax), %eax
+ movw %bx, -2(%edx,%ecx)
+ movw %ax, (%edx)
+ jmp L(return)
+
+L(mm_len_5_8_bytes_forward):
+ movl (%eax), %ebx
+ movl -4(%eax,%ecx), %eax
+ movl %ebx, (%edx)
+ movl %eax, -4(%edx,%ecx)
+ jmp L(return)
+
+L(mm_len_9_16_bytes_forward):
+ movq (%eax), %xmm0
+ movq -8(%eax, %ecx), %xmm1
+ movq %xmm0, (%edx)
+ movq %xmm1, -8(%edx, %ecx)
+ jmp L(return)
+
+L(mm_return_pop_all):
+ movl %edx, %eax
+ POP (%edi)
+ POP (%esi)
+ RETURN
+
+/* Big length copy forward part. */
+ .p2align 4
+L(mm_large_page_loop_forward):
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu 32(%eax), %xmm2
+ movdqu 48(%eax), %xmm3
+ movntdq %xmm0, (%ecx)
+ addl $64, %eax
+ movntdq %xmm1, 16(%ecx)
+ movntdq %xmm2, 32(%ecx)
+ movntdq %xmm3, 48(%ecx)
+ addl $64, %ecx
+ sub $1, %ebx
+ jnz L(mm_large_page_loop_forward)
+ sfence
+ movdqu (%esp), %xmm0
+ addl $16, %esp
+ movdqu %xmm0, (%edx)
+ movdqu %xmm4, -16(%esi)
+ movdqu %xmm5, -32(%esi)
+ movdqu %xmm6, -48(%esi)
+ movdqu %xmm7, -64(%esi)
+ POP (%ebx)
+ jmp L(mm_return_pop_all)
+# endif
+
+L(forward):
+ cmp $16, %ecx
+ jbe L(len_0_16_bytes)
+
+# ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+# endif
+ jae L(large_page)
+
+ movdqu (%eax), %xmm0
+ movdqu -16(%eax, %ecx), %xmm1
+ cmpl $32, %ecx
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, -16(%edx, %ecx)
+ jbe L(return)
+
+ movdqu 16(%eax), %xmm0
+ movdqu -32(%eax, %ecx), %xmm1
+ cmpl $64, %ecx
+ movdqu %xmm0, 16(%edx)
+ movdqu %xmm1, -32(%edx, %ecx)
+ jbe L(return)
+
+ movdqu 32(%eax), %xmm0
+ movdqu 48(%eax), %xmm1
+ movdqu -48(%eax, %ecx), %xmm2
+ movdqu -64(%eax, %ecx), %xmm3
+ cmpl $128, %ecx
+ movdqu %xmm0, 32(%edx)
+ movdqu %xmm1, 48(%edx)
+ movdqu %xmm2, -48(%edx, %ecx)
+ movdqu %xmm3, -64(%edx, %ecx)
+ jbe L(return)
+
+/* Now the main loop: we align the address of the destination. */
+ leal 64(%edx), %ebx
+ andl $-64, %ebx
+
+ addl %edx, %ecx
+ andl $-64, %ecx
+
+ subl %edx, %eax
+
+/* We should stop two iterations before the termination
+ (in order not to misprefetch). */
+ subl $64, %ecx
+ cmpl %ebx, %ecx
+ je L(main_loop_just_one_iteration)
+
+ subl $64, %ecx
+ cmpl %ebx, %ecx
+ je L(main_loop_last_two_iterations)
+
+ .p2align 4
+L(main_loop_cache):
+
+ prefetcht0 128(%ebx, %eax)
+
+ movdqu (%ebx, %eax), %xmm0
+ movdqu 16(%ebx, %eax), %xmm1
+ movdqu 32(%ebx, %eax), %xmm2
+ movdqu 48(%ebx, %eax), %xmm3
+ movdqa %xmm0, (%ebx)
+ movaps %xmm1, 16(%ebx)
+ movaps %xmm2, 32(%ebx)
+ movaps %xmm3, 48(%ebx)
+ lea 64(%ebx), %ebx
+ cmpl %ebx, %ecx
+ jne L(main_loop_cache)
+
+L(main_loop_last_two_iterations):
+ movdqu (%ebx, %eax), %xmm0
+ movdqu 16(%ebx, %eax), %xmm1
+ movdqu 32(%ebx, %eax), %xmm2
+ movdqu 48(%ebx, %eax), %xmm3
+ movdqu 64(%ebx, %eax), %xmm4
+ movdqu 80(%ebx, %eax), %xmm5
+ movdqu 96(%ebx, %eax), %xmm6
+ movdqu 112(%ebx, %eax), %xmm7
+ movdqa %xmm0, (%ebx)
+ movaps %xmm1, 16(%ebx)
+ movaps %xmm2, 32(%ebx)
+ movaps %xmm3, 48(%ebx)
+ movaps %xmm4, 64(%ebx)
+ movaps %xmm5, 80(%ebx)
+ movaps %xmm6, 96(%ebx)
+ movaps %xmm7, 112(%ebx)
+ jmp L(return)
+
+L(main_loop_just_one_iteration):
+ movdqu (%ebx, %eax), %xmm0
+ movdqu 16(%ebx, %eax), %xmm1
+ movdqu 32(%ebx, %eax), %xmm2
+ movdqu 48(%ebx, %eax), %xmm3
+ movdqa %xmm0, (%ebx)
+ movaps %xmm1, 16(%ebx)
+ movaps %xmm2, 32(%ebx)
+ movaps %xmm3, 48(%ebx)
+ jmp L(return)
+
+L(large_page):
+ movdqu (%eax), %xmm0
+ movdqu 16(%eax), %xmm1
+ movdqu 32(%eax), %xmm2
+ movdqu 48(%eax), %xmm3
+ movdqu -64(%eax, %ecx), %xmm4
+ movdqu -48(%eax, %ecx), %xmm5
+ movdqu -32(%eax, %ecx), %xmm6
+ movdqu -16(%eax, %ecx), %xmm7
+ movdqu %xmm0, (%edx)
+ movdqu %xmm1, 16(%edx)
+ movdqu %xmm2, 32(%edx)
+ movdqu %xmm3, 48(%edx)
+ movdqu %xmm4, -64(%edx, %ecx)
+ movdqu %xmm5, -48(%edx, %ecx)
+ movdqu %xmm6, -32(%edx, %ecx)
+ movdqu %xmm7, -16(%edx, %ecx)
+
+ movdqu 64(%eax), %xmm0
+ movdqu 80(%eax), %xmm1
+ movdqu 96(%eax), %xmm2
+ movdqu 112(%eax), %xmm3
+ movdqu -128(%eax, %ecx), %xmm4
+ movdqu -112(%eax, %ecx), %xmm5
+ movdqu -96(%eax, %ecx), %xmm6
+ movdqu -80(%eax, %ecx), %xmm7
+ movdqu %xmm0, 64(%edx)
+ movdqu %xmm1, 80(%edx)
+ movdqu %xmm2, 96(%edx)
+ movdqu %xmm3, 112(%edx)
+ movdqu %xmm4, -128(%edx, %ecx)
+ movdqu %xmm5, -112(%edx, %ecx)
+ movdqu %xmm6, -96(%edx, %ecx)
+ movdqu %xmm7, -80(%edx, %ecx)
+
+/* Now the main loop with non temporal stores. We align
+ the address of the destination. */
+ leal 128(%edx), %ebx
+ andl $-128, %ebx
+
+ addl %edx, %ecx
+ andl $-128, %ecx
+
+ subl %edx, %eax
+
+ .p2align 4
+L(main_loop_large_page):
+ movdqu (%ebx, %eax), %xmm0
+ movdqu 16(%ebx, %eax), %xmm1
+ movdqu 32(%ebx, %eax), %xmm2
+ movdqu 48(%ebx, %eax), %xmm3
+ movdqu 64(%ebx, %eax), %xmm4
+ movdqu 80(%ebx, %eax), %xmm5
+ movdqu 96(%ebx, %eax), %xmm6
+ movdqu 112(%ebx, %eax), %xmm7
+ movntdq %xmm0, (%ebx)
+ movntdq %xmm1, 16(%ebx)
+ movntdq %xmm2, 32(%ebx)
+ movntdq %xmm3, 48(%ebx)
+ movntdq %xmm4, 64(%ebx)
+ movntdq %xmm5, 80(%ebx)
+ movntdq %xmm6, 96(%ebx)
+ movntdq %xmm7, 112(%ebx)
+ lea 128(%ebx), %ebx
+ cmpl %ebx, %ecx
+ jne L(main_loop_large_page)
+ sfence
+ jmp L(return)
+
+L(len_0_16_bytes):
+ testb $24, %cl
+ jne L(len_9_16_bytes)
+ testb $4, %cl
+ .p2align 4,,5
+ jne L(len_5_8_bytes)
+ testl %ecx, %ecx
+ .p2align 4,,2
+ je L(return)
+ movzbl (%eax), %ebx
+ testb $2, %cl
+ movb %bl, (%edx)
+ je L(return)
+ movzwl -2(%eax,%ecx), %ebx
+ movw %bx, -2(%edx,%ecx)
+ jmp L(return)
+
+L(len_9_16_bytes):
+ movq (%eax), %xmm0
+ movq -8(%eax, %ecx), %xmm1
+ movq %xmm0, (%edx)
+ movq %xmm1, -8(%edx, %ecx)
+ jmp L(return)
+
+L(len_5_8_bytes):
+ movl (%eax), %ebx
+ movl %ebx, (%edx)
+ movl -4(%eax,%ecx), %ebx
+ movl %ebx, -4(%edx,%ecx)
+
+L(return):
+ movl %edx, %eax
+# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+ RETURN
+
+END (MEMCPY)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
new file mode 100644
index 0000000000..687e083147
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
@@ -0,0 +1,1809 @@
+/* memcpy with SSSE3 and REP string.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#if IS_IN (libc) \
+ && (defined SHARED \
+ || defined USE_AS_MEMMOVE \
+ || !defined USE_MULTIARCH)
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY __memcpy_ssse3_rep
+# define MEMCPY_CHK __memcpy_chk_ssse3_rep
+#endif
+
+#ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+#else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef SHARED
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
+ addl $(TABLE - .), %ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+#else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+
+#ifdef USE_AS_MEMMOVE
+ cmp %eax, %edx
+ jb L(copy_forward)
+ je L(fwd_write_0bytes)
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
+ add %ecx, %eax
+ cmp %eax, %edx
+ movl SRC(%esp), %eax
+ jb L(copy_backward)
+
+L(copy_forward):
+#endif
+ cmp $48, %ecx
+ jae L(48bytesormore)
+
+L(fwd_write_less32bytes):
+#ifndef USE_AS_MEMMOVE
+ cmp %dl, %al
+ jb L(bk_write)
+#endif
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+#ifndef USE_AS_MEMMOVE
+L(bk_write):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+#endif
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(48bytesormore):
+ movdqu (%eax), %xmm0
+ PUSH (%edi)
+ movl %edx, %edi
+ and $-16, %edx
+ PUSH (%esi)
+ cfi_remember_state
+ add $16, %edx
+ movl %edi, %esi
+ sub %edx, %edi
+ add %edi, %ecx
+ sub %edi, %eax
+
+#ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+#endif
+
+ mov %eax, %edi
+ jae L(large_page)
+ and $0xf, %edi
+ jz L(shl_0)
+
+ BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+
+ ALIGN (4)
+L(shl_0):
+ movdqu %xmm0, (%esi)
+ xor %edi, %edi
+ cmp $127, %ecx
+ ja L(shl_0_gobble)
+ lea -32(%ecx), %ecx
+L(shl_0_loop):
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+L(shl_0_end):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ add %edi, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+L(shl_0_gobble):
+
+#ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi
+# else
+ mov __x86_data_cache_size_half, %edi
+# endif
+#endif
+ mov %edi, %esi
+ shr $3, %esi
+ sub %esi, %edi
+ cmp %edi, %ecx
+ jae L(shl_0_gobble_mem_start)
+ sub $128, %ecx
+ ALIGN (4)
+L(shl_0_gobble_cache_loop):
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_cache_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_cache_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_cache_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_cache_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_cache_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_cache_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_0_gobble_mem_start):
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+ sub $128, %ecx
+L(shl_0_gobble_mem_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ prefetchnta 0x1c0(%edx)
+ prefetchnta 0x280(%edx)
+
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $0x80, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_mem_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_mem_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_mem_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_mem_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_mem_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_mem_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_1):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $1, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_1_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_1_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_1_loop)
+
+L(shl_1_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 1(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_2):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $2, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_2_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_2_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_2_loop)
+
+L(shl_2_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 2(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_3):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $3, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_3_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_3_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_3_loop)
+
+L(shl_3_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 3(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_4):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $4, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_4_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_4_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_4_loop)
+
+L(shl_4_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 4(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_5):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $5, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_5_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_5_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_5_loop)
+
+L(shl_5_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 5(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_6):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $6, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_6_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_6_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_6_loop)
+
+L(shl_6_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 6(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_7):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $7, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_7_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_7_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_7_loop)
+
+L(shl_7_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 7(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_8):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $8, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_8_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_8_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_8_loop)
+
+L(shl_8_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 8(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_9):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $9, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_9_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_9_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_9_loop)
+
+L(shl_9_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 9(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_10):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $10, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_10_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_10_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_10_loop)
+
+L(shl_10_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 10(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_11):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $11, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_11_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_11_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_11_loop)
+
+L(shl_11_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 11(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_12):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $12, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_12_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_12_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_12_loop)
+
+L(shl_12_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 12(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_13):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $13, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_13_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_13_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_13_loop)
+
+L(shl_13_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 13(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_14):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $14, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_14_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_14_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_14_loop)
+
+L(shl_14_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 14(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_15):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $15, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_15_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_15_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_15_loop)
+
+L(shl_15_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 15(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+
+ ALIGN (4)
+L(fwd_write_44bytes):
+ movl -44(%eax), %ecx
+ movl %ecx, -44(%edx)
+L(fwd_write_40bytes):
+ movl -40(%eax), %ecx
+ movl %ecx, -40(%edx)
+L(fwd_write_36bytes):
+ movl -36(%eax), %ecx
+ movl %ecx, -36(%edx)
+L(fwd_write_32bytes):
+ movl -32(%eax), %ecx
+ movl %ecx, -32(%edx)
+L(fwd_write_28bytes):
+ movl -28(%eax), %ecx
+ movl %ecx, -28(%edx)
+L(fwd_write_24bytes):
+ movl -24(%eax), %ecx
+ movl %ecx, -24(%edx)
+L(fwd_write_20bytes):
+ movl -20(%eax), %ecx
+ movl %ecx, -20(%edx)
+L(fwd_write_16bytes):
+ movl -16(%eax), %ecx
+ movl %ecx, -16(%edx)
+L(fwd_write_12bytes):
+ movl -12(%eax), %ecx
+ movl %ecx, -12(%edx)
+L(fwd_write_8bytes):
+ movl -8(%eax), %ecx
+ movl %ecx, -8(%edx)
+L(fwd_write_4bytes):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+L(fwd_write_0bytes):
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_5bytes):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_45bytes):
+ movl -45(%eax), %ecx
+ movl %ecx, -45(%edx)
+L(fwd_write_41bytes):
+ movl -41(%eax), %ecx
+ movl %ecx, -41(%edx)
+L(fwd_write_37bytes):
+ movl -37(%eax), %ecx
+ movl %ecx, -37(%edx)
+L(fwd_write_33bytes):
+ movl -33(%eax), %ecx
+ movl %ecx, -33(%edx)
+L(fwd_write_29bytes):
+ movl -29(%eax), %ecx
+ movl %ecx, -29(%edx)
+L(fwd_write_25bytes):
+ movl -25(%eax), %ecx
+ movl %ecx, -25(%edx)
+L(fwd_write_21bytes):
+ movl -21(%eax), %ecx
+ movl %ecx, -21(%edx)
+L(fwd_write_17bytes):
+ movl -17(%eax), %ecx
+ movl %ecx, -17(%edx)
+L(fwd_write_13bytes):
+ movl -13(%eax), %ecx
+ movl %ecx, -13(%edx)
+L(fwd_write_9bytes):
+ movl -9(%eax), %ecx
+ movl %ecx, -9(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+L(fwd_write_1bytes):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_46bytes):
+ movl -46(%eax), %ecx
+ movl %ecx, -46(%edx)
+L(fwd_write_42bytes):
+ movl -42(%eax), %ecx
+ movl %ecx, -42(%edx)
+L(fwd_write_38bytes):
+ movl -38(%eax), %ecx
+ movl %ecx, -38(%edx)
+L(fwd_write_34bytes):
+ movl -34(%eax), %ecx
+ movl %ecx, -34(%edx)
+L(fwd_write_30bytes):
+ movl -30(%eax), %ecx
+ movl %ecx, -30(%edx)
+L(fwd_write_26bytes):
+ movl -26(%eax), %ecx
+ movl %ecx, -26(%edx)
+L(fwd_write_22bytes):
+ movl -22(%eax), %ecx
+ movl %ecx, -22(%edx)
+L(fwd_write_18bytes):
+ movl -18(%eax), %ecx
+ movl %ecx, -18(%edx)
+L(fwd_write_14bytes):
+ movl -14(%eax), %ecx
+ movl %ecx, -14(%edx)
+L(fwd_write_10bytes):
+ movl -10(%eax), %ecx
+ movl %ecx, -10(%edx)
+L(fwd_write_6bytes):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+L(fwd_write_2bytes):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_47bytes):
+ movl -47(%eax), %ecx
+ movl %ecx, -47(%edx)
+L(fwd_write_43bytes):
+ movl -43(%eax), %ecx
+ movl %ecx, -43(%edx)
+L(fwd_write_39bytes):
+ movl -39(%eax), %ecx
+ movl %ecx, -39(%edx)
+L(fwd_write_35bytes):
+ movl -35(%eax), %ecx
+ movl %ecx, -35(%edx)
+L(fwd_write_31bytes):
+ movl -31(%eax), %ecx
+ movl %ecx, -31(%edx)
+L(fwd_write_27bytes):
+ movl -27(%eax), %ecx
+ movl %ecx, -27(%edx)
+L(fwd_write_23bytes):
+ movl -23(%eax), %ecx
+ movl %ecx, -23(%edx)
+L(fwd_write_19bytes):
+ movl -19(%eax), %ecx
+ movl %ecx, -19(%edx)
+L(fwd_write_15bytes):
+ movl -15(%eax), %ecx
+ movl %ecx, -15(%edx)
+L(fwd_write_11bytes):
+ movl -11(%eax), %ecx
+ movl %ecx, -11(%edx)
+L(fwd_write_7bytes):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+L(fwd_write_3bytes):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN_END
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(large_page):
+ movdqu (%eax), %xmm1
+ movdqu %xmm0, (%esi)
+ movntdq %xmm1, (%edx)
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+L(large_page_loop_init):
+ POP (%esi)
+ sub $0x80, %ecx
+ POP (%edi)
+L(large_page_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ movdqu 0x40(%eax), %xmm4
+ movdqu 0x50(%eax), %xmm5
+ movdqu 0x60(%eax), %xmm6
+ movdqu 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ lfence
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ movntdq %xmm4, 0x40(%edx)
+ movntdq %xmm5, 0x50(%edx)
+ movntdq %xmm6, 0x60(%edx)
+ movntdq %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+ jae L(large_page_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
+
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ lea 0x40(%eax), %eax
+
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ lea 0x40(%edx), %edx
+ sub $0x40, %ecx
+L(large_page_less_64bytes):
+ cmp $32, %ecx
+ jb L(large_page_less_32bytes)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ lea 0x20(%eax), %eax
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ lea 0x20(%edx), %edx
+ sub $0x20, %ecx
+L(large_page_less_32bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ sfence
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(copy_page_by_rep):
+ mov %eax, %esi
+ mov %edx, %edi
+ mov %ecx, %edx
+ shr $2, %ecx
+ and $3, %edx
+ rep movsl
+ jz L(copy_page_by_rep_exit)
+ cmp $2, %edx
+ jb L(copy_page_by_rep_left_1)
+ movzwl (%esi), %eax
+ movw %ax, (%edi)
+ add $2, %esi
+ add $2, %edi
+ sub $2, %edx
+ jz L(copy_page_by_rep_exit)
+L(copy_page_by_rep_left_1):
+ movzbl (%esi), %eax
+ movb %al, (%edi)
+L(copy_page_by_rep_exit):
+ POP (%esi)
+ POP (%edi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_44bytes):
+ movl 40(%eax), %ecx
+ movl %ecx, 40(%edx)
+L(bk_write_40bytes):
+ movl 36(%eax), %ecx
+ movl %ecx, 36(%edx)
+L(bk_write_36bytes):
+ movl 32(%eax), %ecx
+ movl %ecx, 32(%edx)
+L(bk_write_32bytes):
+ movl 28(%eax), %ecx
+ movl %ecx, 28(%edx)
+L(bk_write_28bytes):
+ movl 24(%eax), %ecx
+ movl %ecx, 24(%edx)
+L(bk_write_24bytes):
+ movl 20(%eax), %ecx
+ movl %ecx, 20(%edx)
+L(bk_write_20bytes):
+ movl 16(%eax), %ecx
+ movl %ecx, 16(%edx)
+L(bk_write_16bytes):
+ movl 12(%eax), %ecx
+ movl %ecx, 12(%edx)
+L(bk_write_12bytes):
+ movl 8(%eax), %ecx
+ movl %ecx, 8(%edx)
+L(bk_write_8bytes):
+ movl 4(%eax), %ecx
+ movl %ecx, 4(%edx)
+L(bk_write_4bytes):
+ movl (%eax), %ecx
+ movl %ecx, (%edx)
+L(bk_write_0bytes):
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_45bytes):
+ movl 41(%eax), %ecx
+ movl %ecx, 41(%edx)
+L(bk_write_41bytes):
+ movl 37(%eax), %ecx
+ movl %ecx, 37(%edx)
+L(bk_write_37bytes):
+ movl 33(%eax), %ecx
+ movl %ecx, 33(%edx)
+L(bk_write_33bytes):
+ movl 29(%eax), %ecx
+ movl %ecx, 29(%edx)
+L(bk_write_29bytes):
+ movl 25(%eax), %ecx
+ movl %ecx, 25(%edx)
+L(bk_write_25bytes):
+ movl 21(%eax), %ecx
+ movl %ecx, 21(%edx)
+L(bk_write_21bytes):
+ movl 17(%eax), %ecx
+ movl %ecx, 17(%edx)
+L(bk_write_17bytes):
+ movl 13(%eax), %ecx
+ movl %ecx, 13(%edx)
+L(bk_write_13bytes):
+ movl 9(%eax), %ecx
+ movl %ecx, 9(%edx)
+L(bk_write_9bytes):
+ movl 5(%eax), %ecx
+ movl %ecx, 5(%edx)
+L(bk_write_5bytes):
+ movl 1(%eax), %ecx
+ movl %ecx, 1(%edx)
+L(bk_write_1bytes):
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_46bytes):
+ movl 42(%eax), %ecx
+ movl %ecx, 42(%edx)
+L(bk_write_42bytes):
+ movl 38(%eax), %ecx
+ movl %ecx, 38(%edx)
+L(bk_write_38bytes):
+ movl 34(%eax), %ecx
+ movl %ecx, 34(%edx)
+L(bk_write_34bytes):
+ movl 30(%eax), %ecx
+ movl %ecx, 30(%edx)
+L(bk_write_30bytes):
+ movl 26(%eax), %ecx
+ movl %ecx, 26(%edx)
+L(bk_write_26bytes):
+ movl 22(%eax), %ecx
+ movl %ecx, 22(%edx)
+L(bk_write_22bytes):
+ movl 18(%eax), %ecx
+ movl %ecx, 18(%edx)
+L(bk_write_18bytes):
+ movl 14(%eax), %ecx
+ movl %ecx, 14(%edx)
+L(bk_write_14bytes):
+ movl 10(%eax), %ecx
+ movl %ecx, 10(%edx)
+L(bk_write_10bytes):
+ movl 6(%eax), %ecx
+ movl %ecx, 6(%edx)
+L(bk_write_6bytes):
+ movl 2(%eax), %ecx
+ movl %ecx, 2(%edx)
+L(bk_write_2bytes):
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_47bytes):
+ movl 43(%eax), %ecx
+ movl %ecx, 43(%edx)
+L(bk_write_43bytes):
+ movl 39(%eax), %ecx
+ movl %ecx, 39(%edx)
+L(bk_write_39bytes):
+ movl 35(%eax), %ecx
+ movl %ecx, 35(%edx)
+L(bk_write_35bytes):
+ movl 31(%eax), %ecx
+ movl %ecx, 31(%edx)
+L(bk_write_31bytes):
+ movl 27(%eax), %ecx
+ movl %ecx, 27(%edx)
+L(bk_write_27bytes):
+ movl 23(%eax), %ecx
+ movl %ecx, 23(%edx)
+L(bk_write_23bytes):
+ movl 19(%eax), %ecx
+ movl %ecx, 19(%edx)
+L(bk_write_19bytes):
+ movl 15(%eax), %ecx
+ movl %ecx, 15(%edx)
+L(bk_write_15bytes):
+ movl 11(%eax), %ecx
+ movl %ecx, 11(%edx)
+L(bk_write_11bytes):
+ movl 7(%eax), %ecx
+ movl %ecx, 7(%edx)
+L(bk_write_7bytes):
+ movl 3(%eax), %ecx
+ movl %ecx, 3(%edx)
+L(bk_write_3bytes):
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN_END
+
+
+ .pushsection .rodata.ssse3,"a",@progbits
+ ALIGN (2)
+L(table_48bytes_fwd):
+ .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
+
+ ALIGN (2)
+L(shl_table):
+ .int JMPTBL (L(shl_0), L(shl_table))
+ .int JMPTBL (L(shl_1), L(shl_table))
+ .int JMPTBL (L(shl_2), L(shl_table))
+ .int JMPTBL (L(shl_3), L(shl_table))
+ .int JMPTBL (L(shl_4), L(shl_table))
+ .int JMPTBL (L(shl_5), L(shl_table))
+ .int JMPTBL (L(shl_6), L(shl_table))
+ .int JMPTBL (L(shl_7), L(shl_table))
+ .int JMPTBL (L(shl_8), L(shl_table))
+ .int JMPTBL (L(shl_9), L(shl_table))
+ .int JMPTBL (L(shl_10), L(shl_table))
+ .int JMPTBL (L(shl_11), L(shl_table))
+ .int JMPTBL (L(shl_12), L(shl_table))
+ .int JMPTBL (L(shl_13), L(shl_table))
+ .int JMPTBL (L(shl_14), L(shl_table))
+ .int JMPTBL (L(shl_15), L(shl_table))
+
+ ALIGN (2)
+L(table_48_bytes_bwd):
+ .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
+
+ .popsection
+
+#ifdef USE_AS_MEMMOVE
+ ALIGN (4)
+L(copy_backward):
+ PUSH (%esi)
+ movl %eax, %esi
+ add %ecx, %edx
+ add %ecx, %esi
+ testl $0x3, %edx
+ jnz L(bk_align)
+
+L(bk_aligned_4):
+ cmp $64, %ecx
+ jae L(bk_write_more64bytes)
+
+L(bk_write_64bytesless):
+ cmp $32, %ecx
+ jb L(bk_write_less32bytes)
+
+L(bk_write_more32bytes):
+ /* Copy 32 bytes at a time. */
+ sub $32, %ecx
+ movl -4(%esi), %eax
+ movl %eax, -4(%edx)
+ movl -8(%esi), %eax
+ movl %eax, -8(%edx)
+ movl -12(%esi), %eax
+ movl %eax, -12(%edx)
+ movl -16(%esi), %eax
+ movl %eax, -16(%edx)
+ movl -20(%esi), %eax
+ movl %eax, -20(%edx)
+ movl -24(%esi), %eax
+ movl %eax, -24(%edx)
+ movl -28(%esi), %eax
+ movl %eax, -28(%edx)
+ movl -32(%esi), %eax
+ movl %eax, -32(%edx)
+ sub $32, %edx
+ sub $32, %esi
+
+L(bk_write_less32bytes):
+ movl %esi, %eax
+ sub %ecx, %edx
+ sub %ecx, %eax
+ POP (%esi)
+L(bk_write_less48bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+
+ CFI_PUSH (%esi)
+ ALIGN (4)
+L(bk_align):
+ cmp $8, %ecx
+ jbe L(bk_write_less32bytes)
+ testl $1, %edx
+ /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
+ then (EDX & 2) must be != 0. */
+ jz L(bk_got2)
+ sub $1, %esi
+ sub $1, %ecx
+ sub $1, %edx
+ movzbl (%esi), %eax
+ movb %al, (%edx)
+
+ testl $2, %edx
+ jz L(bk_aligned_4)
+
+L(bk_got2):
+ sub $2, %esi
+ sub $2, %ecx
+ sub $2, %edx
+ movzwl (%esi), %eax
+ movw %ax, (%edx)
+ jmp L(bk_aligned_4)
+
+ ALIGN (4)
+L(bk_write_more64bytes):
+ /* Check alignment of last byte. */
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+/* EDX is aligned 4 bytes, but not 16 bytes. */
+L(bk_ssse3_align):
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+L(bk_ssse3_cpy_pre):
+ cmp $64, %ecx
+ jb L(bk_write_more32bytes)
+
+L(bk_ssse3_cpy):
+ sub $64, %esi
+ sub $64, %ecx
+ sub $64, %edx
+ movdqu 0x30(%esi), %xmm3
+ movdqa %xmm3, 0x30(%edx)
+ movdqu 0x20(%esi), %xmm2
+ movdqa %xmm2, 0x20(%edx)
+ movdqu 0x10(%esi), %xmm1
+ movdqa %xmm1, 0x10(%edx)
+ movdqu (%esi), %xmm0
+ movdqa %xmm0, (%edx)
+ cmp $64, %ecx
+ jae L(bk_ssse3_cpy)
+ jmp L(bk_write_64bytesless)
+
+#endif
+
+END (MEMCPY)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
new file mode 100644
index 0000000000..53e8a6ca1d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
@@ -0,0 +1,3162 @@
+/* memcpy with SSSE3
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) \
+ && (defined SHARED \
+ || defined USE_AS_MEMMOVE \
+ || !defined USE_MULTIARCH)
+
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+# ifndef MEMCPY
+# define MEMCPY __memcpy_ssse3
+# define MEMCPY_CHK __memcpy_chk_ssse3
+# endif
+
+# ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+# else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+# endif
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifdef SHARED
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx, INDEX, SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+# else
+
+# define PARMS 4
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(, INDEX, SCALE)
+# endif
+
+ .section .text.ssse3,"ax",@progbits
+# if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+# endif
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+
+# ifdef USE_AS_MEMMOVE
+ cmp %eax, %edx
+ jb L(copy_forward)
+ je L(fwd_write_0bytes)
+ cmp $32, %ecx
+ jae L(memmove_bwd)
+ jmp L(bk_write_less32bytes_2)
+
+ .p2align 4
+L(memmove_bwd):
+ add %ecx, %eax
+ cmp %eax, %edx
+ movl SRC(%esp), %eax
+ jb L(copy_backward)
+
+L(copy_forward):
+# endif
+ cmp $48, %ecx
+ jae L(48bytesormore)
+
+L(fwd_write_less32bytes):
+# ifndef USE_AS_MEMMOVE
+ cmp %dl, %al
+ jb L(bk_write)
+# endif
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+# ifndef USE_AS_MEMMOVE
+ .p2align 4
+L(bk_write):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+# endif
+
+ .p2align 4
+L(48bytesormore):
+# ifndef USE_AS_MEMMOVE
+ movlpd (%eax), %xmm0
+ movlpd 8(%eax), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+# else
+ movdqu (%eax), %xmm0
+# endif
+ PUSH (%edi)
+ movl %edx, %edi
+ and $-16, %edx
+ add $16, %edx
+ sub %edx, %edi
+ add %edi, %ecx
+ sub %edi, %eax
+
+# ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+# endif
+
+ mov %eax, %edi
+ jae L(large_page)
+ and $0xf, %edi
+ jz L(shl_0)
+ BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+
+ .p2align 4
+L(shl_0):
+# ifdef USE_AS_MEMMOVE
+ movl DEST+4(%esp), %edi
+ movdqu %xmm0, (%edi)
+# endif
+ xor %edi, %edi
+ cmp $127, %ecx
+ ja L(shl_0_gobble)
+ lea -32(%ecx), %ecx
+
+ .p2align 4
+L(shl_0_loop):
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+
+L(shl_0_end):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ add %edi, %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_0_gobble):
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ POP (%edi)
+ lea -128(%ecx), %ecx
+ jae L(shl_0_gobble_mem_loop)
+
+ .p2align 4
+L(shl_0_gobble_cache_loop):
+ movdqa (%eax), %xmm0
+ movdqa 0x10(%eax), %xmm1
+ movdqa 0x20(%eax), %xmm2
+ movdqa 0x30(%eax), %xmm3
+ movdqa 0x40(%eax), %xmm4
+ movdqa 0x50(%eax), %xmm5
+ movdqa 0x60(%eax), %xmm6
+ movdqa 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ movdqa %xmm2, 0x20(%edx)
+ movdqa %xmm3, 0x30(%edx)
+ movdqa %xmm4, 0x40(%edx)
+ movdqa %xmm5, 0x50(%edx)
+ movdqa %xmm6, 0x60(%edx)
+ movdqa %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_cache_loop)
+ cmp $-0x40, %ecx
+ lea 0x80(%ecx), %ecx
+ jl L(shl_0_cache_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+
+L(shl_0_cache_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_cache_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+
+L(shl_0_cache_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_cache_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+
+L(shl_0_cache_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ .p2align 4
+L(shl_0_gobble_mem_loop):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x280(%eax)
+ prefetcht0 0x1c0(%edx)
+
+ movdqa (%eax), %xmm0
+ movdqa 0x10(%eax), %xmm1
+ movdqa 0x20(%eax), %xmm2
+ movdqa 0x30(%eax), %xmm3
+ movdqa 0x40(%eax), %xmm4
+ movdqa 0x50(%eax), %xmm5
+ movdqa 0x60(%eax), %xmm6
+ movdqa 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $0x80, %ecx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ movdqa %xmm2, 0x20(%edx)
+ movdqa %xmm3, 0x30(%edx)
+ movdqa %xmm4, 0x40(%edx)
+ movdqa %xmm5, 0x50(%edx)
+ movdqa %xmm6, 0x60(%edx)
+ movdqa %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_mem_loop)
+ cmp $-0x40, %ecx
+ lea 0x80(%ecx), %ecx
+ jl L(shl_0_mem_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+
+L(shl_0_mem_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_mem_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+
+L(shl_0_mem_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_mem_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+
+L(shl_0_mem_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
+
+ .p2align 4
+L(shl_1):
+# ifndef USE_AS_MEMMOVE
+ movaps -1(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -1(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_1_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl1LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 15(%eax), %xmm2
+ movaps 31(%eax), %xmm3
+ movaps 47(%eax), %xmm4
+ movaps 63(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $1, %xmm4, %xmm5
+ palignr $1, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $1, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl1LoopStart)
+
+L(Shl1LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 15(%eax), %xmm2
+ movaps 31(%eax), %xmm3
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_1_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -1(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_1_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_1_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_1_no_prefetch_loop)
+
+L(sh_1_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 1(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_2):
+# ifndef USE_AS_MEMMOVE
+ movaps -2(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -2(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_2_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl2LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 14(%eax), %xmm2
+ movaps 30(%eax), %xmm3
+ movaps 46(%eax), %xmm4
+ movaps 62(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $2, %xmm4, %xmm5
+ palignr $2, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $2, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl2LoopStart)
+
+L(Shl2LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 14(%eax), %xmm2
+ movaps 30(%eax), %xmm3
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_2_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -2(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_2_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_2_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_2_no_prefetch_loop)
+
+L(sh_2_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 2(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_3):
+# ifndef USE_AS_MEMMOVE
+ movaps -3(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -3(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_3_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl3LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 13(%eax), %xmm2
+ movaps 29(%eax), %xmm3
+ movaps 45(%eax), %xmm4
+ movaps 61(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $3, %xmm4, %xmm5
+ palignr $3, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $3, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl3LoopStart)
+
+L(Shl3LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 13(%eax), %xmm2
+ movaps 29(%eax), %xmm3
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_3_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -3(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_3_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(sh_3_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(sh_3_no_prefetch_loop)
+
+L(sh_3_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 3(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_4):
+# ifndef USE_AS_MEMMOVE
+ movaps -4(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -4(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_4_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl4LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 12(%eax), %xmm2
+ movaps 28(%eax), %xmm3
+ movaps 44(%eax), %xmm4
+ movaps 60(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $4, %xmm4, %xmm5
+ palignr $4, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $4, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl4LoopStart)
+
+L(Shl4LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 12(%eax), %xmm2
+ movaps 28(%eax), %xmm3
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_4_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -4(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_4_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(sh_4_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(sh_4_no_prefetch_loop)
+
+L(sh_4_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 4(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_5):
+# ifndef USE_AS_MEMMOVE
+ movaps -5(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -5(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_5_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl5LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 11(%eax), %xmm2
+ movaps 27(%eax), %xmm3
+ movaps 43(%eax), %xmm4
+ movaps 59(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $5, %xmm4, %xmm5
+ palignr $5, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $5, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl5LoopStart)
+
+L(Shl5LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 11(%eax), %xmm2
+ movaps 27(%eax), %xmm3
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_5_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -5(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_5_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(sh_5_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(sh_5_no_prefetch_loop)
+
+L(sh_5_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 5(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_6):
+# ifndef USE_AS_MEMMOVE
+ movaps -6(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -6(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_6_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl6LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 10(%eax), %xmm2
+ movaps 26(%eax), %xmm3
+ movaps 42(%eax), %xmm4
+ movaps 58(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $6, %xmm4, %xmm5
+ palignr $6, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $6, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl6LoopStart)
+
+L(Shl6LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 10(%eax), %xmm2
+ movaps 26(%eax), %xmm3
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_6_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -6(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_6_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(sh_6_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(sh_6_no_prefetch_loop)
+
+L(sh_6_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 6(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_7):
+# ifndef USE_AS_MEMMOVE
+ movaps -7(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -7(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_7_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl7LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 9(%eax), %xmm2
+ movaps 25(%eax), %xmm3
+ movaps 41(%eax), %xmm4
+ movaps 57(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $7, %xmm4, %xmm5
+ palignr $7, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $7, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl7LoopStart)
+
+L(Shl7LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 9(%eax), %xmm2
+ movaps 25(%eax), %xmm3
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_7_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -7(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_7_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_7_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_7_no_prefetch_loop)
+
+L(sh_7_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 7(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_8):
+# ifndef USE_AS_MEMMOVE
+ movaps -8(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -8(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_8_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl8LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 8(%eax), %xmm2
+ movaps 24(%eax), %xmm3
+ movaps 40(%eax), %xmm4
+ movaps 56(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $8, %xmm4, %xmm5
+ palignr $8, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $8, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl8LoopStart)
+
+L(LoopLeave8):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 8(%eax), %xmm2
+ movaps 24(%eax), %xmm3
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_8_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -8(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_8_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_8_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_8_no_prefetch_loop)
+
+L(sh_8_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 8(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_9):
+# ifndef USE_AS_MEMMOVE
+ movaps -9(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -9(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_9_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl9LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 7(%eax), %xmm2
+ movaps 23(%eax), %xmm3
+ movaps 39(%eax), %xmm4
+ movaps 55(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $9, %xmm4, %xmm5
+ palignr $9, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $9, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $9, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl9LoopStart)
+
+L(Shl9LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 7(%eax), %xmm2
+ movaps 23(%eax), %xmm3
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_9_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -9(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_9_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_9_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_9_no_prefetch_loop)
+
+L(sh_9_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 9(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_10):
+# ifndef USE_AS_MEMMOVE
+ movaps -10(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -10(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_10_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl10LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 6(%eax), %xmm2
+ movaps 22(%eax), %xmm3
+ movaps 38(%eax), %xmm4
+ movaps 54(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $10, %xmm4, %xmm5
+ palignr $10, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $10, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $10, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl10LoopStart)
+
+L(Shl10LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 6(%eax), %xmm2
+ movaps 22(%eax), %xmm3
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_10_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -10(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_10_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_10_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_10_no_prefetch_loop)
+
+L(sh_10_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 10(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_11):
+# ifndef USE_AS_MEMMOVE
+ movaps -11(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -11(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_11_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl11LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 5(%eax), %xmm2
+ movaps 21(%eax), %xmm3
+ movaps 37(%eax), %xmm4
+ movaps 53(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $11, %xmm4, %xmm5
+ palignr $11, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $11, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $11, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl11LoopStart)
+
+L(Shl11LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 5(%eax), %xmm2
+ movaps 21(%eax), %xmm3
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_11_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -11(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_11_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_11_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_11_no_prefetch_loop)
+
+L(sh_11_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 11(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_12):
+# ifndef USE_AS_MEMMOVE
+ movaps -12(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -12(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_12_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl12LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 4(%eax), %xmm2
+ movaps 20(%eax), %xmm3
+ movaps 36(%eax), %xmm4
+ movaps 52(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $12, %xmm4, %xmm5
+ palignr $12, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $12, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl12LoopStart)
+
+L(Shl12LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 4(%eax), %xmm2
+ movaps 20(%eax), %xmm3
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_12_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -12(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_12_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_12_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_12_no_prefetch_loop)
+
+L(sh_12_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 12(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_13):
+# ifndef USE_AS_MEMMOVE
+ movaps -13(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -13(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_13_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl13LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 3(%eax), %xmm2
+ movaps 19(%eax), %xmm3
+ movaps 35(%eax), %xmm4
+ movaps 51(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $13, %xmm4, %xmm5
+ palignr $13, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $13, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $13, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl13LoopStart)
+
+L(Shl13LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 3(%eax), %xmm2
+ movaps 19(%eax), %xmm3
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_13_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -13(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_13_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_13_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_13_no_prefetch_loop)
+
+L(sh_13_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 13(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_14):
+# ifndef USE_AS_MEMMOVE
+ movaps -14(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -14(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_14_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl14LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 2(%eax), %xmm2
+ movaps 18(%eax), %xmm3
+ movaps 34(%eax), %xmm4
+ movaps 50(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $14, %xmm4, %xmm5
+ palignr $14, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $14, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $14, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl14LoopStart)
+
+L(Shl14LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 2(%eax), %xmm2
+ movaps 18(%eax), %xmm3
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_14_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -14(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_14_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_14_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_14_no_prefetch_loop)
+
+L(sh_14_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 14(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_15):
+# ifndef USE_AS_MEMMOVE
+ movaps -15(%eax), %xmm1
+# else
+ movl DEST+4(%esp), %edi
+ movaps -15(%eax), %xmm1
+ movdqu %xmm0, (%edi)
+# endif
+# ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+# else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_data_cache_size_half, %ecx
+# endif
+# endif
+ jb L(sh_15_no_prefetch)
+
+ lea -64(%ecx), %ecx
+
+ .p2align 4
+L(Shl15LoopStart):
+ prefetcht0 0x1c0(%eax)
+ prefetcht0 0x1c0(%edx)
+ movaps 1(%eax), %xmm2
+ movaps 17(%eax), %xmm3
+ movaps 33(%eax), %xmm4
+ movaps 49(%eax), %xmm5
+ movaps %xmm5, %xmm7
+ palignr $15, %xmm4, %xmm5
+ palignr $15, %xmm3, %xmm4
+ movaps %xmm5, 48(%edx)
+ palignr $15, %xmm2, %xmm3
+ lea 64(%eax), %eax
+ palignr $15, %xmm1, %xmm2
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm7, %xmm1
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ sub $64, %ecx
+ ja L(Shl15LoopStart)
+
+L(Shl15LoopLeave):
+ add $32, %ecx
+ jle L(shl_end_0)
+
+ movaps 1(%eax), %xmm2
+ movaps 17(%eax), %xmm3
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+
+ movaps %xmm2, (%edx)
+ movaps %xmm3, 16(%edx)
+ lea 32(%edx, %ecx), %edx
+ lea 32(%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(sh_15_no_prefetch):
+ lea -32(%ecx), %ecx
+ lea -15(%eax), %eax
+ xor %edi, %edi
+
+ .p2align 4
+L(sh_15_no_prefetch_loop):
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jb L(sh_15_end_no_prefetch_loop)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+ jae L(sh_15_no_prefetch_loop)
+
+L(sh_15_end_no_prefetch_loop):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 15(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(shl_end_0):
+ lea 32(%ecx), %ecx
+ lea (%edx, %ecx), %edx
+ lea (%eax, %ecx), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
+
+ .p2align 4
+L(fwd_write_44bytes):
+ movq -44(%eax), %xmm0
+ movq %xmm0, -44(%edx)
+L(fwd_write_36bytes):
+ movq -36(%eax), %xmm0
+ movq %xmm0, -36(%edx)
+L(fwd_write_28bytes):
+ movq -28(%eax), %xmm0
+ movq %xmm0, -28(%edx)
+L(fwd_write_20bytes):
+ movq -20(%eax), %xmm0
+ movq %xmm0, -20(%edx)
+L(fwd_write_12bytes):
+ movq -12(%eax), %xmm0
+ movq %xmm0, -12(%edx)
+L(fwd_write_4bytes):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_40bytes):
+ movq -40(%eax), %xmm0
+ movq %xmm0, -40(%edx)
+L(fwd_write_32bytes):
+ movq -32(%eax), %xmm0
+ movq %xmm0, -32(%edx)
+L(fwd_write_24bytes):
+ movq -24(%eax), %xmm0
+ movq %xmm0, -24(%edx)
+L(fwd_write_16bytes):
+ movq -16(%eax), %xmm0
+ movq %xmm0, -16(%edx)
+L(fwd_write_8bytes):
+ movq -8(%eax), %xmm0
+ movq %xmm0, -8(%edx)
+L(fwd_write_0bytes):
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_5bytes):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_45bytes):
+ movq -45(%eax), %xmm0
+ movq %xmm0, -45(%edx)
+L(fwd_write_37bytes):
+ movq -37(%eax), %xmm0
+ movq %xmm0, -37(%edx)
+L(fwd_write_29bytes):
+ movq -29(%eax), %xmm0
+ movq %xmm0, -29(%edx)
+L(fwd_write_21bytes):
+ movq -21(%eax), %xmm0
+ movq %xmm0, -21(%edx)
+L(fwd_write_13bytes):
+ movq -13(%eax), %xmm0
+ movq %xmm0, -13(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_41bytes):
+ movq -41(%eax), %xmm0
+ movq %xmm0, -41(%edx)
+L(fwd_write_33bytes):
+ movq -33(%eax), %xmm0
+ movq %xmm0, -33(%edx)
+L(fwd_write_25bytes):
+ movq -25(%eax), %xmm0
+ movq %xmm0, -25(%edx)
+L(fwd_write_17bytes):
+ movq -17(%eax), %xmm0
+ movq %xmm0, -17(%edx)
+L(fwd_write_9bytes):
+ movq -9(%eax), %xmm0
+ movq %xmm0, -9(%edx)
+L(fwd_write_1bytes):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_46bytes):
+ movq -46(%eax), %xmm0
+ movq %xmm0, -46(%edx)
+L(fwd_write_38bytes):
+ movq -38(%eax), %xmm0
+ movq %xmm0, -38(%edx)
+L(fwd_write_30bytes):
+ movq -30(%eax), %xmm0
+ movq %xmm0, -30(%edx)
+L(fwd_write_22bytes):
+ movq -22(%eax), %xmm0
+ movq %xmm0, -22(%edx)
+L(fwd_write_14bytes):
+ movq -14(%eax), %xmm0
+ movq %xmm0, -14(%edx)
+L(fwd_write_6bytes):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_42bytes):
+ movq -42(%eax), %xmm0
+ movq %xmm0, -42(%edx)
+L(fwd_write_34bytes):
+ movq -34(%eax), %xmm0
+ movq %xmm0, -34(%edx)
+L(fwd_write_26bytes):
+ movq -26(%eax), %xmm0
+ movq %xmm0, -26(%edx)
+L(fwd_write_18bytes):
+ movq -18(%eax), %xmm0
+ movq %xmm0, -18(%edx)
+L(fwd_write_10bytes):
+ movq -10(%eax), %xmm0
+ movq %xmm0, -10(%edx)
+L(fwd_write_2bytes):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_47bytes):
+ movq -47(%eax), %xmm0
+ movq %xmm0, -47(%edx)
+L(fwd_write_39bytes):
+ movq -39(%eax), %xmm0
+ movq %xmm0, -39(%edx)
+L(fwd_write_31bytes):
+ movq -31(%eax), %xmm0
+ movq %xmm0, -31(%edx)
+L(fwd_write_23bytes):
+ movq -23(%eax), %xmm0
+ movq %xmm0, -23(%edx)
+L(fwd_write_15bytes):
+ movq -15(%eax), %xmm0
+ movq %xmm0, -15(%edx)
+L(fwd_write_7bytes):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_43bytes):
+ movq -43(%eax), %xmm0
+ movq %xmm0, -43(%edx)
+L(fwd_write_35bytes):
+ movq -35(%eax), %xmm0
+ movq %xmm0, -35(%edx)
+L(fwd_write_27bytes):
+ movq -27(%eax), %xmm0
+ movq %xmm0, -27(%edx)
+L(fwd_write_19bytes):
+ movq -19(%eax), %xmm0
+ movq %xmm0, -19(%edx)
+L(fwd_write_11bytes):
+ movq -11(%eax), %xmm0
+ movq %xmm0, -11(%edx)
+L(fwd_write_3bytes):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_40bytes_align):
+ movdqa -40(%eax), %xmm0
+ movdqa %xmm0, -40(%edx)
+L(fwd_write_24bytes_align):
+ movdqa -24(%eax), %xmm0
+ movdqa %xmm0, -24(%edx)
+L(fwd_write_8bytes_align):
+ movq -8(%eax), %xmm0
+ movq %xmm0, -8(%edx)
+L(fwd_write_0bytes_align):
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_32bytes_align):
+ movdqa -32(%eax), %xmm0
+ movdqa %xmm0, -32(%edx)
+L(fwd_write_16bytes_align):
+ movdqa -16(%eax), %xmm0
+ movdqa %xmm0, -16(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_5bytes_align):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_45bytes_align):
+ movdqa -45(%eax), %xmm0
+ movdqa %xmm0, -45(%edx)
+L(fwd_write_29bytes_align):
+ movdqa -29(%eax), %xmm0
+ movdqa %xmm0, -29(%edx)
+L(fwd_write_13bytes_align):
+ movq -13(%eax), %xmm0
+ movq %xmm0, -13(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_37bytes_align):
+ movdqa -37(%eax), %xmm0
+ movdqa %xmm0, -37(%edx)
+L(fwd_write_21bytes_align):
+ movdqa -21(%eax), %xmm0
+ movdqa %xmm0, -21(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_41bytes_align):
+ movdqa -41(%eax), %xmm0
+ movdqa %xmm0, -41(%edx)
+L(fwd_write_25bytes_align):
+ movdqa -25(%eax), %xmm0
+ movdqa %xmm0, -25(%edx)
+L(fwd_write_9bytes_align):
+ movq -9(%eax), %xmm0
+ movq %xmm0, -9(%edx)
+L(fwd_write_1bytes_align):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_33bytes_align):
+ movdqa -33(%eax), %xmm0
+ movdqa %xmm0, -33(%edx)
+L(fwd_write_17bytes_align):
+ movdqa -17(%eax), %xmm0
+ movdqa %xmm0, -17(%edx)
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_46bytes_align):
+ movdqa -46(%eax), %xmm0
+ movdqa %xmm0, -46(%edx)
+L(fwd_write_30bytes_align):
+ movdqa -30(%eax), %xmm0
+ movdqa %xmm0, -30(%edx)
+L(fwd_write_14bytes_align):
+ movq -14(%eax), %xmm0
+ movq %xmm0, -14(%edx)
+L(fwd_write_6bytes_align):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_38bytes_align):
+ movdqa -38(%eax), %xmm0
+ movdqa %xmm0, -38(%edx)
+L(fwd_write_22bytes_align):
+ movdqa -22(%eax), %xmm0
+ movdqa %xmm0, -22(%edx)
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_42bytes_align):
+ movdqa -42(%eax), %xmm0
+ movdqa %xmm0, -42(%edx)
+L(fwd_write_26bytes_align):
+ movdqa -26(%eax), %xmm0
+ movdqa %xmm0, -26(%edx)
+L(fwd_write_10bytes_align):
+ movq -10(%eax), %xmm0
+ movq %xmm0, -10(%edx)
+L(fwd_write_2bytes_align):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_34bytes_align):
+ movdqa -34(%eax), %xmm0
+ movdqa %xmm0, -34(%edx)
+L(fwd_write_18bytes_align):
+ movdqa -18(%eax), %xmm0
+ movdqa %xmm0, -18(%edx)
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_47bytes_align):
+ movdqa -47(%eax), %xmm0
+ movdqa %xmm0, -47(%edx)
+L(fwd_write_31bytes_align):
+ movdqa -31(%eax), %xmm0
+ movdqa %xmm0, -31(%edx)
+L(fwd_write_15bytes_align):
+ movq -15(%eax), %xmm0
+ movq %xmm0, -15(%edx)
+L(fwd_write_7bytes_align):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_39bytes_align):
+ movdqa -39(%eax), %xmm0
+ movdqa %xmm0, -39(%edx)
+L(fwd_write_23bytes_align):
+ movdqa -23(%eax), %xmm0
+ movdqa %xmm0, -23(%edx)
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_43bytes_align):
+ movdqa -43(%eax), %xmm0
+ movdqa %xmm0, -43(%edx)
+L(fwd_write_27bytes_align):
+ movdqa -27(%eax), %xmm0
+ movdqa %xmm0, -27(%edx)
+L(fwd_write_11bytes_align):
+ movq -11(%eax), %xmm0
+ movq %xmm0, -11(%edx)
+L(fwd_write_3bytes_align):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_35bytes_align):
+ movdqa -35(%eax), %xmm0
+ movdqa %xmm0, -35(%edx)
+L(fwd_write_19bytes_align):
+ movdqa -19(%eax), %xmm0
+ movdqa %xmm0, -19(%edx)
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_44bytes_align):
+ movdqa -44(%eax), %xmm0
+ movdqa %xmm0, -44(%edx)
+L(fwd_write_28bytes_align):
+ movdqa -28(%eax), %xmm0
+ movdqa %xmm0, -28(%edx)
+L(fwd_write_12bytes_align):
+ movq -12(%eax), %xmm0
+ movq %xmm0, -12(%edx)
+L(fwd_write_4bytes_align):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(fwd_write_36bytes_align):
+ movdqa -36(%eax), %xmm0
+ movdqa %xmm0, -36(%edx)
+L(fwd_write_20bytes_align):
+ movdqa -20(%eax), %xmm0
+ movdqa %xmm0, -20(%edx)
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+# ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+# endif
+ RETURN_END
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(large_page):
+ movdqu (%eax), %xmm1
+# ifdef USE_AS_MEMMOVE
+ movl DEST+4(%esp), %edi
+ movdqu %xmm0, (%edi)
+# endif
+ lea 16(%eax), %eax
+ movntdq %xmm1, (%edx)
+ lea 16(%edx), %edx
+ lea -0x90(%ecx), %ecx
+ POP (%edi)
+
+ .p2align 4
+L(large_page_loop):
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ movdqu 0x40(%eax), %xmm4
+ movdqu 0x50(%eax), %xmm5
+ movdqu 0x60(%eax), %xmm6
+ movdqu 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ movntdq %xmm4, 0x40(%edx)
+ movntdq %xmm5, 0x50(%edx)
+ movntdq %xmm6, 0x60(%edx)
+ movntdq %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+ jae L(large_page_loop)
+ cmp $-0x40, %ecx
+ lea 0x80(%ecx), %ecx
+ jl L(large_page_less_64bytes)
+
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ lea 0x40(%eax), %eax
+
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ lea 0x40(%edx), %edx
+ sub $0x40, %ecx
+L(large_page_less_64bytes):
+ cmp $32, %ecx
+ jb L(large_page_less_32bytes)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ lea 0x20(%eax), %eax
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ lea 0x20(%edx), %edx
+ sub $0x20, %ecx
+L(large_page_less_32bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ sfence
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ .p2align 4
+L(bk_write_44bytes):
+ movq 36(%eax), %xmm0
+ movq %xmm0, 36(%edx)
+L(bk_write_36bytes):
+ movq 28(%eax), %xmm0
+ movq %xmm0, 28(%edx)
+L(bk_write_28bytes):
+ movq 20(%eax), %xmm0
+ movq %xmm0, 20(%edx)
+L(bk_write_20bytes):
+ movq 12(%eax), %xmm0
+ movq %xmm0, 12(%edx)
+L(bk_write_12bytes):
+ movq 4(%eax), %xmm0
+ movq %xmm0, 4(%edx)
+L(bk_write_4bytes):
+ movl (%eax), %ecx
+ movl %ecx, (%edx)
+L(bk_write_0bytes):
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_40bytes):
+ movq 32(%eax), %xmm0
+ movq %xmm0, 32(%edx)
+L(bk_write_32bytes):
+ movq 24(%eax), %xmm0
+ movq %xmm0, 24(%edx)
+L(bk_write_24bytes):
+ movq 16(%eax), %xmm0
+ movq %xmm0, 16(%edx)
+L(bk_write_16bytes):
+ movq 8(%eax), %xmm0
+ movq %xmm0, 8(%edx)
+L(bk_write_8bytes):
+ movq (%eax), %xmm0
+ movq %xmm0, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_45bytes):
+ movq 37(%eax), %xmm0
+ movq %xmm0, 37(%edx)
+L(bk_write_37bytes):
+ movq 29(%eax), %xmm0
+ movq %xmm0, 29(%edx)
+L(bk_write_29bytes):
+ movq 21(%eax), %xmm0
+ movq %xmm0, 21(%edx)
+L(bk_write_21bytes):
+ movq 13(%eax), %xmm0
+ movq %xmm0, 13(%edx)
+L(bk_write_13bytes):
+ movq 5(%eax), %xmm0
+ movq %xmm0, 5(%edx)
+L(bk_write_5bytes):
+ movl 1(%eax), %ecx
+ movl %ecx, 1(%edx)
+L(bk_write_1bytes):
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_41bytes):
+ movq 33(%eax), %xmm0
+ movq %xmm0, 33(%edx)
+L(bk_write_33bytes):
+ movq 25(%eax), %xmm0
+ movq %xmm0, 25(%edx)
+L(bk_write_25bytes):
+ movq 17(%eax), %xmm0
+ movq %xmm0, 17(%edx)
+L(bk_write_17bytes):
+ movq 9(%eax), %xmm0
+ movq %xmm0, 9(%edx)
+L(bk_write_9bytes):
+ movq 1(%eax), %xmm0
+ movq %xmm0, 1(%edx)
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_46bytes):
+ movq 38(%eax), %xmm0
+ movq %xmm0, 38(%edx)
+L(bk_write_38bytes):
+ movq 30(%eax), %xmm0
+ movq %xmm0, 30(%edx)
+L(bk_write_30bytes):
+ movq 22(%eax), %xmm0
+ movq %xmm0, 22(%edx)
+L(bk_write_22bytes):
+ movq 14(%eax), %xmm0
+ movq %xmm0, 14(%edx)
+L(bk_write_14bytes):
+ movq 6(%eax), %xmm0
+ movq %xmm0, 6(%edx)
+L(bk_write_6bytes):
+ movl 2(%eax), %ecx
+ movl %ecx, 2(%edx)
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_42bytes):
+ movq 34(%eax), %xmm0
+ movq %xmm0, 34(%edx)
+L(bk_write_34bytes):
+ movq 26(%eax), %xmm0
+ movq %xmm0, 26(%edx)
+L(bk_write_26bytes):
+ movq 18(%eax), %xmm0
+ movq %xmm0, 18(%edx)
+L(bk_write_18bytes):
+ movq 10(%eax), %xmm0
+ movq %xmm0, 10(%edx)
+L(bk_write_10bytes):
+ movq 2(%eax), %xmm0
+ movq %xmm0, 2(%edx)
+L(bk_write_2bytes):
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_47bytes):
+ movq 39(%eax), %xmm0
+ movq %xmm0, 39(%edx)
+L(bk_write_39bytes):
+ movq 31(%eax), %xmm0
+ movq %xmm0, 31(%edx)
+L(bk_write_31bytes):
+ movq 23(%eax), %xmm0
+ movq %xmm0, 23(%edx)
+L(bk_write_23bytes):
+ movq 15(%eax), %xmm0
+ movq %xmm0, 15(%edx)
+L(bk_write_15bytes):
+ movq 7(%eax), %xmm0
+ movq %xmm0, 7(%edx)
+L(bk_write_7bytes):
+ movl 3(%eax), %ecx
+ movl %ecx, 3(%edx)
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(bk_write_43bytes):
+ movq 35(%eax), %xmm0
+ movq %xmm0, 35(%edx)
+L(bk_write_35bytes):
+ movq 27(%eax), %xmm0
+ movq %xmm0, 27(%edx)
+L(bk_write_27bytes):
+ movq 19(%eax), %xmm0
+ movq %xmm0, 19(%edx)
+L(bk_write_19bytes):
+ movq 11(%eax), %xmm0
+ movq %xmm0, 11(%edx)
+L(bk_write_11bytes):
+ movq 3(%eax), %xmm0
+ movq %xmm0, 3(%edx)
+L(bk_write_3bytes):
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+# ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+# endif
+ RETURN_END
+
+
+ .pushsection .rodata.ssse3,"a",@progbits
+ .p2align 2
+L(table_48bytes_fwd):
+ .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
+
+ .p2align 2
+L(table_48bytes_fwd_align):
+ .int JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
+ .int JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
+
+ .p2align 2
+L(shl_table):
+ .int JMPTBL (L(shl_0), L(shl_table))
+ .int JMPTBL (L(shl_1), L(shl_table))
+ .int JMPTBL (L(shl_2), L(shl_table))
+ .int JMPTBL (L(shl_3), L(shl_table))
+ .int JMPTBL (L(shl_4), L(shl_table))
+ .int JMPTBL (L(shl_5), L(shl_table))
+ .int JMPTBL (L(shl_6), L(shl_table))
+ .int JMPTBL (L(shl_7), L(shl_table))
+ .int JMPTBL (L(shl_8), L(shl_table))
+ .int JMPTBL (L(shl_9), L(shl_table))
+ .int JMPTBL (L(shl_10), L(shl_table))
+ .int JMPTBL (L(shl_11), L(shl_table))
+ .int JMPTBL (L(shl_12), L(shl_table))
+ .int JMPTBL (L(shl_13), L(shl_table))
+ .int JMPTBL (L(shl_14), L(shl_table))
+ .int JMPTBL (L(shl_15), L(shl_table))
+
+ .p2align 2
+L(table_48_bytes_bwd):
+ .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
+
+ .popsection
+
+# ifdef USE_AS_MEMMOVE
+ .p2align 4
+L(copy_backward):
+ PUSH (%edi)
+ movl %eax, %edi
+ lea (%ecx,%edx,1),%edx
+ lea (%ecx,%edi,1),%edi
+ testl $0x3, %edx
+ jnz L(bk_align)
+
+L(bk_aligned_4):
+ cmp $64, %ecx
+ jae L(bk_write_more64bytes)
+
+L(bk_write_64bytesless):
+ cmp $32, %ecx
+ jb L(bk_write_less32bytes)
+
+L(bk_write_more32bytes):
+ /* Copy 32 bytes at a time. */
+ sub $32, %ecx
+ movq -8(%edi), %xmm0
+ movq %xmm0, -8(%edx)
+ movq -16(%edi), %xmm0
+ movq %xmm0, -16(%edx)
+ movq -24(%edi), %xmm0
+ movq %xmm0, -24(%edx)
+ movq -32(%edi), %xmm0
+ movq %xmm0, -32(%edx)
+ sub $32, %edx
+ sub $32, %edi
+
+L(bk_write_less32bytes):
+ movl %edi, %eax
+ sub %ecx, %edx
+ sub %ecx, %eax
+ POP (%edi)
+L(bk_write_less32bytes_2):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(bk_align):
+ cmp $8, %ecx
+ jbe L(bk_write_less32bytes)
+ testl $1, %edx
+ /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
+ then (EDX & 2) must be != 0. */
+ jz L(bk_got2)
+ sub $1, %edi
+ sub $1, %ecx
+ sub $1, %edx
+ movzbl (%edi), %eax
+ movb %al, (%edx)
+
+ testl $2, %edx
+ jz L(bk_aligned_4)
+
+L(bk_got2):
+ sub $2, %edi
+ sub $2, %ecx
+ sub $2, %edx
+ movzwl (%edi), %eax
+ movw %ax, (%edx)
+ jmp L(bk_aligned_4)
+
+ .p2align 4
+L(bk_write_more64bytes):
+ /* Check alignment of last byte. */
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+/* EDX is aligned 4 bytes, but not 16 bytes. */
+L(bk_ssse3_align):
+ sub $4, %edi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%edi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %edi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%edi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %edi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%edi), %eax
+ movl %eax, (%edx)
+
+L(bk_ssse3_cpy_pre):
+ cmp $64, %ecx
+ jb L(bk_write_more32bytes)
+
+ .p2align 4
+L(bk_ssse3_cpy):
+ sub $64, %edi
+ sub $64, %ecx
+ sub $64, %edx
+ movdqu 0x30(%edi), %xmm3
+ movdqa %xmm3, 0x30(%edx)
+ movdqu 0x20(%edi), %xmm2
+ movdqa %xmm2, 0x20(%edx)
+ movdqu 0x10(%edi), %xmm1
+ movdqa %xmm1, 0x10(%edx)
+ movdqu (%edi), %xmm0
+ movdqa %xmm0, (%edx)
+ cmp $64, %ecx
+ jae L(bk_ssse3_cpy)
+ jmp L(bk_write_64bytesless)
+
+# endif
+
+END (MEMCPY)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy.S
new file mode 100644
index 0000000000..f725944620
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy.S
@@ -0,0 +1,78 @@
+/* Multiple versions of memcpy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. In static binaries we need memcpy before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+ .text
+ENTRY(memcpy)
+ .type memcpy, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memcpy_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcpy_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep)
+2: ret
+END(memcpy)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memcpy_ia32, @function; \
+ .p2align 4; \
+ .globl __memcpy_ia32; \
+ .hidden __memcpy_ia32; \
+ __memcpy_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+ .type __memcpy_chk_ia32, @function; \
+ .globl __memcpy_chk_ia32; \
+ .p2align 4; \
+ __memcpy_chk_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+ cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32
+
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32
+#endif
+
+#include "../memcpy.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy_chk.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy_chk.S
new file mode 100644
index 0000000000..1b4fbe2e6f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memcpy_chk.S
@@ -0,0 +1,50 @@
+/* Multiple versions of __memcpy_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. There are no multiarch memcpy functions for static binaries.
+ */
+#if IS_IN (libc)
+# ifdef SHARED
+ .text
+ENTRY(__memcpy_chk)
+ .type __memcpy_chk, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep)
+2: ret
+END(__memcpy_chk)
+# else
+# include "../memcpy_chk.S"
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
new file mode 100644
index 0000000000..3873594cb2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY __memmove_sse2_unaligned
+#define MEMCPY_CHK __memmove_chk_sse2_unaligned
+#include "memcpy-sse2-unaligned.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S
new file mode 100644
index 0000000000..d202fc4a13
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY __memmove_ssse3_rep
+#define MEMCPY_CHK __memmove_chk_ssse3_rep
+#include "memcpy-ssse3-rep.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3.S
new file mode 100644
index 0000000000..295430b1ef
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY __memmove_ssse3
+#define MEMCPY_CHK __memmove_chk_ssse3
+#include "memcpy-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memmove.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove.S
new file mode 100644
index 0000000000..6eb418ca7f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove.S
@@ -0,0 +1,89 @@
+/* Multiple versions of memmove
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#if IS_IN (libc)
+ .text
+ENTRY(memmove)
+ .type memmove, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memmove_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memmove_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep)
+2: ret
+END(memmove)
+
+# ifdef SHARED
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memmove_ia32, @function; \
+ .p2align 4; \
+ .globl __memmove_ia32; \
+ .hidden __memmove_ia32; \
+ __memmove_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# else
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memmove_ia32, @function; \
+ .globl __memmove_ia32; \
+ .p2align 4; \
+ __memmove_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# endif
+
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memmove_ia32, .-__memmove_ia32
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+ .type __memmove_chk_ia32, @function; \
+ .globl __memmove_chk_ia32; \
+ .p2align 4; \
+ __memmove_chk_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+ cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memmove; __GI_memmove = __memmove_ia32
+# endif
+#endif
+
+#include "../memmove.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memmove_chk.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove_chk.S
new file mode 100644
index 0000000000..314834c4c6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memmove_chk.S
@@ -0,0 +1,94 @@
+/* Multiple versions of __memmove_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#if IS_IN (libc)
+ .text
+ENTRY(__memmove_chk)
+ .type __memmove_chk, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memmove_chk_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep)
+2: ret
+END(__memmove_chk)
+
+# ifndef SHARED
+ .type __memmove_chk_sse2_unaligned, @function
+ .p2align 4;
+__memmove_chk_sse2_unaligned:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memmove_sse2_unaligned
+ cfi_endproc
+ .size __memmove_chk_sse2_unaligned, .-__memmove_chk_sse2_unaligned
+
+ .type __memmove_chk_ssse3, @function
+ .p2align 4;
+__memmove_chk_ssse3:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memmove_ssse3
+ cfi_endproc
+ .size __memmove_chk_ssse3, .-__memmove_chk_ssse3
+
+ .type __memmove_chk_ssse3_rep, @function
+ .p2align 4;
+__memmove_chk_ssse3_rep:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memmove_ssse3_rep
+ cfi_endproc
+ .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep
+
+ .type __memmove_chk_ia32, @function
+ .p2align 4;
+__memmove_chk_ia32:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memmove_ia32
+ cfi_endproc
+ .size __memmove_chk_ia32, .-__memmove_chk_ia32
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
new file mode 100644
index 0000000000..a1cea50771
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY __mempcpy_sse2_unaligned
+#define MEMCPY_CHK __mempcpy_chk_sse2_unaligned
+#include "memcpy-sse2-unaligned.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S
new file mode 100644
index 0000000000..5357b33e18
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY __mempcpy_ssse3_rep
+#define MEMCPY_CHK __mempcpy_chk_ssse3_rep
+#include "memcpy-ssse3-rep.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S
new file mode 100644
index 0000000000..822d98e954
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY __mempcpy_ssse3
+#define MEMCPY_CHK __mempcpy_chk_ssse3
+#include "memcpy-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy.S
new file mode 100644
index 0000000000..06e377fbc9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy.S
@@ -0,0 +1,81 @@
+/* Multiple versions of mempcpy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. In static binaries we need mempcpy before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+ .text
+ENTRY(__mempcpy)
+ .type __mempcpy, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__mempcpy_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__mempcpy_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep)
+2: ret
+END(__mempcpy)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __mempcpy_ia32, @function; \
+ .p2align 4; \
+ .globl __mempcpy_ia32; \
+ .hidden __mempcpy_ia32; \
+ __mempcpy_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+ .type __mempcpy_chk_ia32, @function; \
+ .globl __mempcpy_chk_ia32; \
+ .p2align 4; \
+ __mempcpy_chk_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+ cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32
+
+# undef libc_hidden_def
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_def(name) \
+ .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32
+# define libc_hidden_builtin_def(name) \
+ .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32
+#endif
+
+#include "../mempcpy.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy_chk.S
new file mode 100644
index 0000000000..e13e5248a5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/mempcpy_chk.S
@@ -0,0 +1,50 @@
+/* Multiple versions of __mempcpy_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. There are no multiarch mempcpy functions for static binaries.
+ */
+#if IS_IN (libc)
+# ifdef SHARED
+ .text
+ENTRY(__mempcpy_chk)
+ .type __mempcpy_chk, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep)
+2: ret
+END(__mempcpy_chk)
+# else
+# include "../mempcpy_chk.S"
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-c.c
new file mode 100644
index 0000000000..ef7bbbe792
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-c.c
@@ -0,0 +1,7 @@
+#if IS_IN (libc)
+# define MEMRCHR __memrchr_ia32
+# include <string.h>
+extern void *__memrchr_ia32 (const void *, int, size_t);
+#endif
+
+#include "string/memrchr.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
new file mode 100644
index 0000000000..dbbe94fd08
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
@@ -0,0 +1,417 @@
+/* Optimized memrchr with sse2
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define STR1 PARMS
+# define STR2 STR1+4
+# define LEN STR2+4
+
+# define MEMCHR __memrchr_sse2_bsf
+
+ .text
+ENTRY (MEMCHR)
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+ mov LEN(%esp), %edx
+
+ sub $16, %edx
+ jbe L(length_less16)
+
+ punpcklbw %xmm1, %xmm1
+ add %edx, %ecx
+ punpcklbw %xmm1, %xmm1
+
+ movdqu (%ecx), %xmm0
+ pshufd $0, %xmm1, %xmm1
+ pcmpeqb %xmm1, %xmm0
+
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ sub $64, %ecx
+ mov %ecx, %eax
+ and $15, %eax
+ jz L(loop_prolog)
+
+ add $16, %ecx
+ add $16, %edx
+ sub %eax, %ecx
+ sub %eax, %edx
+
+ .p2align 4
+/* Loop start on aligned string. */
+L(loop_prolog):
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ movdqa 32(%ecx), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa (%ecx), %xmm4
+ pcmpeqb %xmm1, %xmm4
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ sub $64, %ecx
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ movdqa 32(%ecx), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa (%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ mov %ecx, %eax
+ and $63, %eax
+ test %eax, %eax
+ jz L(align64_loop)
+
+ add $64, %ecx
+ add $64, %edx
+ sub %eax, %ecx
+ sub %eax, %edx
+
+ .p2align 4
+L(align64_loop):
+ sub $64, %ecx
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa (%ecx), %xmm0
+ movdqa 16(%ecx), %xmm2
+ movdqa 32(%ecx), %xmm3
+ movdqa 48(%ecx), %xmm4
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm1, %xmm4
+
+ pmaxub %xmm3, %xmm0
+ pmaxub %xmm4, %xmm2
+ pmaxub %xmm0, %xmm2
+ pmovmskb %xmm2, %eax
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm2
+
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb (%ecx), %xmm1
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ pmovmskb %xmm1, %eax
+ bsr %eax, %eax
+
+ add %ecx, %eax
+ ret
+
+ .p2align 4
+L(exit_loop):
+ add $64, %edx
+ cmp $32, %edx
+ jbe L(exit_loop_32)
+
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ movdqa 32(%ecx), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches16_1)
+ cmp $48, %edx
+ jbe L(return_null)
+
+ pcmpeqb (%ecx), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches0_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(exit_loop_32):
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48_1)
+ cmp $16, %edx
+ jbe L(return_null)
+
+ pcmpeqb 32(%ecx), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(matches0):
+ bsr %eax, %eax
+ add %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches16):
+ bsr %eax, %eax
+ lea 16(%eax, %ecx), %eax
+ ret
+
+ .p2align 4
+L(matches32):
+ bsr %eax, %eax
+ lea 32(%eax, %ecx), %eax
+ ret
+
+ .p2align 4
+L(matches48):
+ bsr %eax, %eax
+ lea 48(%eax, %ecx), %eax
+ ret
+
+ .p2align 4
+L(matches0_1):
+ bsr %eax, %eax
+ sub $64, %edx
+ add %eax, %edx
+ jl L(return_null)
+ add %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches16_1):
+ bsr %eax, %eax
+ sub $48, %edx
+ add %eax, %edx
+ jl L(return_null)
+ lea 16(%ecx, %eax), %eax
+ ret
+
+ .p2align 4
+L(matches32_1):
+ bsr %eax, %eax
+ sub $32, %edx
+ add %eax, %edx
+ jl L(return_null)
+ lea 32(%ecx, %eax), %eax
+ ret
+
+ .p2align 4
+L(matches48_1):
+ bsr %eax, %eax
+ sub $16, %edx
+ add %eax, %edx
+ jl L(return_null)
+ lea 48(%ecx, %eax), %eax
+ ret
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(length_less16_offset0):
+ mov %dl, %cl
+ pcmpeqb (%eax), %xmm1
+
+ mov $1, %edx
+ sal %cl, %edx
+ sub $1, %edx
+ mov %edx, %ecx
+
+ pmovmskb %xmm1, %edx
+
+ and %ecx, %edx
+ test %edx, %edx
+ jz L(return_null)
+
+ bsr %edx, %ecx
+ add %ecx, %eax
+ ret
+
+ .p2align 4
+L(length_less16):
+ punpcklbw %xmm1, %xmm1
+ mov %ecx, %eax
+ punpcklbw %xmm1, %xmm1
+ add $16, %edx
+ jz L(return_null)
+
+ pshufd $0, %xmm1, %xmm1
+ and $15, %ecx
+ jz L(length_less16_offset0)
+
+ PUSH (%edi)
+ mov %cl, %dh
+ add %dl, %dh
+ and $-16, %eax
+
+ sub $16, %dh
+ ja L(length_less16_part2)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edi
+
+ sar %cl, %edi
+ add %ecx, %eax
+ mov %dl, %cl
+
+ mov $1, %edx
+ sal %cl, %edx
+ sub $1, %edx
+
+ and %edx, %edi
+ test %edi, %edi
+ jz L(ret_null)
+
+ bsr %edi, %edi
+ add %edi, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(length_less16_part2):
+ movdqa 16(%eax), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %edi
+
+ mov %cl, %ch
+
+ mov %dh, %cl
+ mov $1, %edx
+ sal %cl, %edx
+ sub $1, %edx
+
+ and %edx, %edi
+
+ test %edi, %edi
+ jnz L(length_less16_part2_return)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edi
+
+ mov %ch, %cl
+ sar %cl, %edi
+ test %edi, %edi
+ jz L(ret_null)
+
+ bsr %edi, %edi
+ add %edi, %eax
+ xor %ch, %ch
+ add %ecx, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(length_less16_part2_return):
+ bsr %edi, %edi
+ lea 16(%eax, %edi), %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(ret_null):
+ xor %eax, %eax
+ POP (%edi)
+ ret
+
+END (MEMCHR)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2.S
new file mode 100644
index 0000000000..5f7853f683
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr-sse2.S
@@ -0,0 +1,724 @@
+/* Optimized memrchr with sse2 without bsf
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define STR1 PARMS
+# define STR2 STR1+4
+# define LEN STR2+4
+
+ atom_text_section
+ENTRY (__memrchr_sse2)
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+ mov LEN(%esp), %edx
+
+ sub $16, %edx
+ jbe L(length_less16)
+
+ punpcklbw %xmm1, %xmm1
+ add %edx, %ecx
+ punpcklbw %xmm1, %xmm1
+
+ movdqu (%ecx), %xmm0
+ pshufd $0, %xmm1, %xmm1
+ pcmpeqb %xmm1, %xmm0
+
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(exit_dispatch)
+
+ sub $64, %ecx
+ mov %ecx, %eax
+ and $15, %eax
+ jz L(loop_prolog)
+
+ lea 16(%ecx), %ecx
+ lea 16(%edx), %edx
+ sub %eax, %edx
+ and $-16, %ecx
+
+ .p2align 4
+/* Loop start on aligned string. */
+L(loop_prolog):
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ movdqa 32(%ecx), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa (%ecx), %xmm4
+ pcmpeqb %xmm1, %xmm4
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(exit_dispatch)
+
+ sub $64, %ecx
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ movdqa 32(%ecx), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa (%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(exit_dispatch)
+
+ mov %ecx, %eax
+ and $63, %eax
+ test %eax, %eax
+ jz L(align64_loop)
+
+ lea 64(%ecx), %ecx
+ lea 64(%edx), %edx
+ and $-64, %ecx
+ sub %eax, %edx
+
+ .p2align 4
+L(align64_loop):
+ sub $64, %ecx
+ sub $64, %edx
+ jbe L(exit_loop)
+
+ movdqa (%ecx), %xmm0
+ movdqa 16(%ecx), %xmm2
+ movdqa 32(%ecx), %xmm3
+ movdqa 48(%ecx), %xmm4
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm1, %xmm4
+
+ pmaxub %xmm3, %xmm0
+ pmaxub %xmm4, %xmm2
+ pmaxub %xmm0, %xmm2
+ pmovmskb %xmm2, %eax
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm2
+
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb (%ecx), %xmm1
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ pmovmskb %xmm1, %eax
+ test %ah, %ah
+ jnz L(exit_dispatch_high)
+ mov %al, %dl
+ and $15 << 4, %dl
+ jnz L(exit_dispatch_8)
+ test $0x08, %al
+ jnz L(exit_4)
+ test $0x04, %al
+ jnz L(exit_3)
+ test $0x02, %al
+ jnz L(exit_2)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(exit_loop):
+ add $64, %edx
+ cmp $32, %edx
+ jbe L(exit_loop_32)
+
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48)
+
+ movdqa 32(%ecx), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 16(%ecx), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches16_1)
+ cmp $48, %edx
+ jbe L(return_null)
+
+ pcmpeqb (%ecx), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches0_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(exit_loop_32):
+ movdqa 48(%ecx), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches48_1)
+ cmp $16, %edx
+ jbe L(return_null)
+
+ pcmpeqb 32(%ecx), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(matches16):
+ lea 16(%ecx), %ecx
+ test %ah, %ah
+ jnz L(exit_dispatch_high)
+ mov %al, %dl
+ and $15 << 4, %dl
+ jnz L(exit_dispatch_8)
+ test $0x08, %al
+ jnz L(exit_4)
+ test $0x04, %al
+ jnz L(exit_3)
+ test $0x02, %al
+ jnz L(exit_2)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches32):
+ lea 32(%ecx), %ecx
+ test %ah, %ah
+ jnz L(exit_dispatch_high)
+ mov %al, %dl
+ and $15 << 4, %dl
+ jnz L(exit_dispatch_8)
+ test $0x08, %al
+ jnz L(exit_4)
+ test $0x04, %al
+ jnz L(exit_3)
+ test $0x02, %al
+ jnz L(exit_2)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches48):
+ lea 48(%ecx), %ecx
+
+ .p2align 4
+L(exit_dispatch):
+ test %ah, %ah
+ jnz L(exit_dispatch_high)
+ mov %al, %dl
+ and $15 << 4, %dl
+ jnz L(exit_dispatch_8)
+ test $0x08, %al
+ jnz L(exit_4)
+ test $0x04, %al
+ jnz L(exit_3)
+ test $0x02, %al
+ jnz L(exit_2)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(exit_dispatch_8):
+ test $0x80, %al
+ jnz L(exit_8)
+ test $0x40, %al
+ jnz L(exit_7)
+ test $0x20, %al
+ jnz L(exit_6)
+ lea 4(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_dispatch_high):
+ mov %ah, %dh
+ and $15 << 4, %dh
+ jnz L(exit_dispatch_high_8)
+ test $0x08, %ah
+ jnz L(exit_12)
+ test $0x04, %ah
+ jnz L(exit_11)
+ test $0x02, %ah
+ jnz L(exit_10)
+ lea 8(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_dispatch_high_8):
+ test $0x80, %ah
+ jnz L(exit_16)
+ test $0x40, %ah
+ jnz L(exit_15)
+ test $0x20, %ah
+ jnz L(exit_14)
+ lea 12(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_2):
+ lea 1(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_3):
+ lea 2(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_4):
+ lea 3(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_6):
+ lea 5(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_7):
+ lea 6(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_8):
+ lea 7(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_10):
+ lea 9(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_11):
+ lea 10(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_12):
+ lea 11(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_14):
+ lea 13(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_15):
+ lea 14(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_16):
+ lea 15(%ecx), %eax
+ ret
+
+ .p2align 4
+L(matches0_1):
+ lea -64(%edx), %edx
+
+ test %ah, %ah
+ jnz L(exit_dispatch_1_high)
+ mov %al, %ah
+ and $15 << 4, %ah
+ jnz L(exit_dispatch_1_8)
+ test $0x08, %al
+ jnz L(exit_1_4)
+ test $0x04, %al
+ jnz L(exit_1_3)
+ test $0x02, %al
+ jnz L(exit_1_2)
+ add $0, %edx
+ jl L(return_null)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches16_1):
+ lea -48(%edx), %edx
+ lea 16(%ecx), %ecx
+
+ test %ah, %ah
+ jnz L(exit_dispatch_1_high)
+ mov %al, %ah
+ and $15 << 4, %ah
+ jnz L(exit_dispatch_1_8)
+ test $0x08, %al
+ jnz L(exit_1_4)
+ test $0x04, %al
+ jnz L(exit_1_3)
+ test $0x02, %al
+ jnz L(exit_1_2)
+ add $0, %edx
+ jl L(return_null)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches32_1):
+ lea -32(%edx), %edx
+ lea 32(%ecx), %ecx
+
+ test %ah, %ah
+ jnz L(exit_dispatch_1_high)
+ mov %al, %ah
+ and $15 << 4, %ah
+ jnz L(exit_dispatch_1_8)
+ test $0x08, %al
+ jnz L(exit_1_4)
+ test $0x04, %al
+ jnz L(exit_1_3)
+ test $0x02, %al
+ jnz L(exit_1_2)
+ add $0, %edx
+ jl L(return_null)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(matches48_1):
+ lea -16(%edx), %edx
+ lea 48(%ecx), %ecx
+
+ .p2align 4
+L(exit_dispatch_1):
+ test %ah, %ah
+ jnz L(exit_dispatch_1_high)
+ mov %al, %ah
+ and $15 << 4, %ah
+ jnz L(exit_dispatch_1_8)
+ test $0x08, %al
+ jnz L(exit_1_4)
+ test $0x04, %al
+ jnz L(exit_1_3)
+ test $0x02, %al
+ jnz L(exit_1_2)
+ add $0, %edx
+ jl L(return_null)
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(exit_dispatch_1_8):
+ test $0x80, %al
+ jnz L(exit_1_8)
+ test $0x40, %al
+ jnz L(exit_1_7)
+ test $0x20, %al
+ jnz L(exit_1_6)
+ add $4, %edx
+ jl L(return_null)
+ lea 4(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_dispatch_1_high):
+ mov %ah, %al
+ and $15 << 4, %al
+ jnz L(exit_dispatch_1_high_8)
+ test $0x08, %ah
+ jnz L(exit_1_12)
+ test $0x04, %ah
+ jnz L(exit_1_11)
+ test $0x02, %ah
+ jnz L(exit_1_10)
+ add $8, %edx
+ jl L(return_null)
+ lea 8(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_dispatch_1_high_8):
+ test $0x80, %ah
+ jnz L(exit_1_16)
+ test $0x40, %ah
+ jnz L(exit_1_15)
+ test $0x20, %ah
+ jnz L(exit_1_14)
+ add $12, %edx
+ jl L(return_null)
+ lea 12(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_2):
+ add $1, %edx
+ jl L(return_null)
+ lea 1(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_3):
+ add $2, %edx
+ jl L(return_null)
+ lea 2(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_4):
+ add $3, %edx
+ jl L(return_null)
+ lea 3(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_6):
+ add $5, %edx
+ jl L(return_null)
+ lea 5(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_7):
+ add $6, %edx
+ jl L(return_null)
+ lea 6(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_8):
+ add $7, %edx
+ jl L(return_null)
+ lea 7(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_10):
+ add $9, %edx
+ jl L(return_null)
+ lea 9(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_11):
+ add $10, %edx
+ jl L(return_null)
+ lea 10(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_12):
+ add $11, %edx
+ jl L(return_null)
+ lea 11(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_14):
+ add $13, %edx
+ jl L(return_null)
+ lea 13(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_15):
+ add $14, %edx
+ jl L(return_null)
+ lea 14(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit_1_16):
+ add $15, %edx
+ jl L(return_null)
+ lea 15(%ecx), %eax
+ ret
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(length_less16_offset0):
+ mov %dl, %cl
+ pcmpeqb (%eax), %xmm1
+
+ mov $1, %edx
+ sal %cl, %edx
+ sub $1, %edx
+
+ mov %eax, %ecx
+ pmovmskb %xmm1, %eax
+
+ and %edx, %eax
+ test %eax, %eax
+ jnz L(exit_dispatch)
+
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(length_less16):
+ punpcklbw %xmm1, %xmm1
+ add $16, %edx
+ je L(return_null)
+ punpcklbw %xmm1, %xmm1
+
+ mov %ecx, %eax
+ pshufd $0, %xmm1, %xmm1
+
+ and $15, %ecx
+ jz L(length_less16_offset0)
+
+ PUSH (%edi)
+
+ mov %cl, %dh
+ add %dl, %dh
+ and $-16, %eax
+
+ sub $16, %dh
+ ja L(length_less16_part2)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edi
+
+ sar %cl, %edi
+ add %ecx, %eax
+ mov %dl, %cl
+
+ mov $1, %edx
+ sal %cl, %edx
+ sub $1, %edx
+
+ and %edx, %edi
+ test %edi, %edi
+ jz L(ret_null)
+
+ bsr %edi, %edi
+ add %edi, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(length_less16_part2):
+ movdqa 16(%eax), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %edi
+
+ mov %cl, %ch
+
+ mov %dh, %cl
+ mov $1, %edx
+ sal %cl, %edx
+ sub $1, %edx
+
+ and %edx, %edi
+
+ test %edi, %edi
+ jnz L(length_less16_part2_return)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edi
+
+ mov %ch, %cl
+ sar %cl, %edi
+ test %edi, %edi
+ jz L(ret_null)
+
+ bsr %edi, %edi
+ add %edi, %eax
+ xor %ch, %ch
+ add %ecx, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(length_less16_part2_return):
+ bsr %edi, %edi
+ lea 16(%eax, %edi), %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(ret_null):
+ xor %eax, %eax
+ POP (%edi)
+ ret
+
+END (__memrchr_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr.S
new file mode 100644
index 0000000000..d4253a553b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memrchr.S
@@ -0,0 +1,45 @@
+/* Multiple versions of memrchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(__memrchr)
+ .type __memrchr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jz 3f
+
+ LOAD_FUNC_GOT_EAX (__memrchr_sse2)
+ ret
+
+2: LOAD_FUNC_GOT_EAX (__memrchr_ia32)
+ ret
+
+3: LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf)
+ ret
+END(__memrchr)
+
+weak_alias(__memrchr, memrchr)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
new file mode 100644
index 0000000000..3221077e49
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
@@ -0,0 +1,811 @@
+/* memset with SSE2 and REP string.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_BZERO
+# define DEST PARMS
+# define LEN DEST+4
+# define SETRTNVAL
+#else
+# define DEST PARMS
+# define CHR DEST+4
+# define LEN CHR+4
+# define SETRTNVAL movl DEST(%esp), %eax
+#endif
+
+#ifdef SHARED
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define PARMS 8 /* Preserve EBX. */
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ add $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ add (%ebx,%ecx,4), %ebx; \
+ add %ecx, %edx; \
+ /* We loaded the jump table and adjusted EDX. Go. */ \
+ jmp *%ebx
+#else
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define PARMS 4
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
+ add %ecx, %edx; \
+ jmp *TABLE(,%ecx,4)
+#endif
+
+ .section .text.sse2,"ax",@progbits
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk_sse2_rep)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_sse2_rep)
+#endif
+ENTRY (__memset_sse2_rep)
+ ENTRANCE
+
+ movl LEN(%esp), %ecx
+#ifdef USE_AS_BZERO
+ xor %eax, %eax
+#else
+ movzbl CHR(%esp), %eax
+ movb %al, %ah
+ /* Fill the whole EAX with pattern. */
+ movl %eax, %edx
+ shl $16, %eax
+ or %edx, %eax
+#endif
+ movl DEST(%esp), %edx
+ cmp $32, %ecx
+ jae L(32bytesormore)
+
+L(write_less32bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
+
+
+ .pushsection .rodata.sse2,"a",@progbits
+ ALIGN (2)
+L(table_less_32bytes):
+ .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
+ .popsection
+
+ ALIGN (4)
+L(write_28bytes):
+ movl %eax, -28(%edx)
+L(write_24bytes):
+ movl %eax, -24(%edx)
+L(write_20bytes):
+ movl %eax, -20(%edx)
+L(write_16bytes):
+ movl %eax, -16(%edx)
+L(write_12bytes):
+ movl %eax, -12(%edx)
+L(write_8bytes):
+ movl %eax, -8(%edx)
+L(write_4bytes):
+ movl %eax, -4(%edx)
+L(write_0bytes):
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(write_29bytes):
+ movl %eax, -29(%edx)
+L(write_25bytes):
+ movl %eax, -25(%edx)
+L(write_21bytes):
+ movl %eax, -21(%edx)
+L(write_17bytes):
+ movl %eax, -17(%edx)
+L(write_13bytes):
+ movl %eax, -13(%edx)
+L(write_9bytes):
+ movl %eax, -9(%edx)
+L(write_5bytes):
+ movl %eax, -5(%edx)
+L(write_1bytes):
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(write_30bytes):
+ movl %eax, -30(%edx)
+L(write_26bytes):
+ movl %eax, -26(%edx)
+L(write_22bytes):
+ movl %eax, -22(%edx)
+L(write_18bytes):
+ movl %eax, -18(%edx)
+L(write_14bytes):
+ movl %eax, -14(%edx)
+L(write_10bytes):
+ movl %eax, -10(%edx)
+L(write_6bytes):
+ movl %eax, -6(%edx)
+L(write_2bytes):
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(write_31bytes):
+ movl %eax, -31(%edx)
+L(write_27bytes):
+ movl %eax, -27(%edx)
+L(write_23bytes):
+ movl %eax, -23(%edx)
+L(write_19bytes):
+ movl %eax, -19(%edx)
+L(write_15bytes):
+ movl %eax, -15(%edx)
+L(write_11bytes):
+ movl %eax, -11(%edx)
+L(write_7bytes):
+ movl %eax, -7(%edx)
+L(write_3bytes):
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(32bytesormore):
+ /* Fill xmm0 with the pattern. */
+#ifdef USE_AS_BZERO
+ pxor %xmm0, %xmm0
+#else
+ movd %eax, %xmm0
+ pshufd $0, %xmm0, %xmm0
+#endif
+ testl $0xf, %edx
+ jz L(aligned_16)
+/* ECX > 32 and EDX is not 16 byte aligned. */
+L(not_aligned_16):
+ movdqu %xmm0, (%edx)
+ movl %edx, %eax
+ and $-16, %edx
+ add $16, %edx
+ sub %edx, %eax
+ add %eax, %ecx
+ movd %xmm0, %eax
+
+ ALIGN (4)
+L(aligned_16):
+ cmp $128, %ecx
+ jae L(128bytesormore)
+
+L(aligned_16_less128bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+ ALIGN (4)
+L(128bytesormore):
+ PUSH (%edi)
+#ifdef DATA_CACHE_SIZE
+ PUSH (%ebx)
+ mov $DATA_CACHE_SIZE, %ebx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_data_cache_size@GOTOFF(%ebx), %ebx
+# else
+ PUSH (%ebx)
+ mov __x86_data_cache_size, %ebx
+# endif
+#endif
+ mov %ebx, %edi
+ shr $4, %ebx
+ sub %ebx, %edi
+#if defined DATA_CACHE_SIZE || !defined SHARED
+ POP (%ebx)
+#endif
+/*
+ * When data size approximate the end of L1 cache,
+ * fast string will prefetch and combine data efficiently.
+ */
+ cmp %edi, %ecx
+ jae L(128bytesormore_endof_L1)
+ subl $128, %ecx
+L(128bytesormore_normal):
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm0, 0x10(%edx)
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm0, 0x30(%edx)
+ movdqa %xmm0, 0x40(%edx)
+ movdqa %xmm0, 0x50(%edx)
+ movdqa %xmm0, 0x60(%edx)
+ movdqa %xmm0, 0x70(%edx)
+ lea 128(%edx), %edx
+ jb L(128bytesless_normal)
+
+
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm0, 0x10(%edx)
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm0, 0x30(%edx)
+ movdqa %xmm0, 0x40(%edx)
+ movdqa %xmm0, 0x50(%edx)
+ movdqa %xmm0, 0x60(%edx)
+ movdqa %xmm0, 0x70(%edx)
+ lea 128(%edx), %edx
+ jae L(128bytesormore_normal)
+
+L(128bytesless_normal):
+ POP (%edi)
+ add $128, %ecx
+ BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+ CFI_PUSH (%edi)
+ ALIGN (4)
+L(128bytesormore_endof_L1):
+ mov %edx, %edi
+ mov %ecx, %edx
+ shr $2, %ecx
+ and $3, %edx
+ rep stosl
+ jz L(copy_page_by_rep_exit)
+ cmp $2, %edx
+ jb L(copy_page_by_rep_left_1)
+ movw %ax, (%edi)
+ add $2, %edi
+ sub $2, %edx
+ jz L(copy_page_by_rep_exit)
+L(copy_page_by_rep_left_1):
+ movb %al, (%edi)
+L(copy_page_by_rep_exit):
+ POP (%edi)
+ SETRTNVAL
+ RETURN
+
+ .pushsection .rodata.sse2,"a",@progbits
+ ALIGN (2)
+L(table_16_128bytes):
+ .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
+ .popsection
+
+ ALIGN (4)
+L(aligned_16_112bytes):
+ movdqa %xmm0, -112(%edx)
+L(aligned_16_96bytes):
+ movdqa %xmm0, -96(%edx)
+L(aligned_16_80bytes):
+ movdqa %xmm0, -80(%edx)
+L(aligned_16_64bytes):
+ movdqa %xmm0, -64(%edx)
+L(aligned_16_48bytes):
+ movdqa %xmm0, -48(%edx)
+L(aligned_16_32bytes):
+ movdqa %xmm0, -32(%edx)
+L(aligned_16_16bytes):
+ movdqa %xmm0, -16(%edx)
+L(aligned_16_0bytes):
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_113bytes):
+ movdqa %xmm0, -113(%edx)
+L(aligned_16_97bytes):
+ movdqa %xmm0, -97(%edx)
+L(aligned_16_81bytes):
+ movdqa %xmm0, -81(%edx)
+L(aligned_16_65bytes):
+ movdqa %xmm0, -65(%edx)
+L(aligned_16_49bytes):
+ movdqa %xmm0, -49(%edx)
+L(aligned_16_33bytes):
+ movdqa %xmm0, -33(%edx)
+L(aligned_16_17bytes):
+ movdqa %xmm0, -17(%edx)
+L(aligned_16_1bytes):
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_114bytes):
+ movdqa %xmm0, -114(%edx)
+L(aligned_16_98bytes):
+ movdqa %xmm0, -98(%edx)
+L(aligned_16_82bytes):
+ movdqa %xmm0, -82(%edx)
+L(aligned_16_66bytes):
+ movdqa %xmm0, -66(%edx)
+L(aligned_16_50bytes):
+ movdqa %xmm0, -50(%edx)
+L(aligned_16_34bytes):
+ movdqa %xmm0, -34(%edx)
+L(aligned_16_18bytes):
+ movdqa %xmm0, -18(%edx)
+L(aligned_16_2bytes):
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_115bytes):
+ movdqa %xmm0, -115(%edx)
+L(aligned_16_99bytes):
+ movdqa %xmm0, -99(%edx)
+L(aligned_16_83bytes):
+ movdqa %xmm0, -83(%edx)
+L(aligned_16_67bytes):
+ movdqa %xmm0, -67(%edx)
+L(aligned_16_51bytes):
+ movdqa %xmm0, -51(%edx)
+L(aligned_16_35bytes):
+ movdqa %xmm0, -35(%edx)
+L(aligned_16_19bytes):
+ movdqa %xmm0, -19(%edx)
+L(aligned_16_3bytes):
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_116bytes):
+ movdqa %xmm0, -116(%edx)
+L(aligned_16_100bytes):
+ movdqa %xmm0, -100(%edx)
+L(aligned_16_84bytes):
+ movdqa %xmm0, -84(%edx)
+L(aligned_16_68bytes):
+ movdqa %xmm0, -68(%edx)
+L(aligned_16_52bytes):
+ movdqa %xmm0, -52(%edx)
+L(aligned_16_36bytes):
+ movdqa %xmm0, -36(%edx)
+L(aligned_16_20bytes):
+ movdqa %xmm0, -20(%edx)
+L(aligned_16_4bytes):
+ movl %eax, -4(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_117bytes):
+ movdqa %xmm0, -117(%edx)
+L(aligned_16_101bytes):
+ movdqa %xmm0, -101(%edx)
+L(aligned_16_85bytes):
+ movdqa %xmm0, -85(%edx)
+L(aligned_16_69bytes):
+ movdqa %xmm0, -69(%edx)
+L(aligned_16_53bytes):
+ movdqa %xmm0, -53(%edx)
+L(aligned_16_37bytes):
+ movdqa %xmm0, -37(%edx)
+L(aligned_16_21bytes):
+ movdqa %xmm0, -21(%edx)
+L(aligned_16_5bytes):
+ movl %eax, -5(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_118bytes):
+ movdqa %xmm0, -118(%edx)
+L(aligned_16_102bytes):
+ movdqa %xmm0, -102(%edx)
+L(aligned_16_86bytes):
+ movdqa %xmm0, -86(%edx)
+L(aligned_16_70bytes):
+ movdqa %xmm0, -70(%edx)
+L(aligned_16_54bytes):
+ movdqa %xmm0, -54(%edx)
+L(aligned_16_38bytes):
+ movdqa %xmm0, -38(%edx)
+L(aligned_16_22bytes):
+ movdqa %xmm0, -22(%edx)
+L(aligned_16_6bytes):
+ movl %eax, -6(%edx)
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_119bytes):
+ movdqa %xmm0, -119(%edx)
+L(aligned_16_103bytes):
+ movdqa %xmm0, -103(%edx)
+L(aligned_16_87bytes):
+ movdqa %xmm0, -87(%edx)
+L(aligned_16_71bytes):
+ movdqa %xmm0, -71(%edx)
+L(aligned_16_55bytes):
+ movdqa %xmm0, -55(%edx)
+L(aligned_16_39bytes):
+ movdqa %xmm0, -39(%edx)
+L(aligned_16_23bytes):
+ movdqa %xmm0, -23(%edx)
+L(aligned_16_7bytes):
+ movl %eax, -7(%edx)
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_120bytes):
+ movdqa %xmm0, -120(%edx)
+L(aligned_16_104bytes):
+ movdqa %xmm0, -104(%edx)
+L(aligned_16_88bytes):
+ movdqa %xmm0, -88(%edx)
+L(aligned_16_72bytes):
+ movdqa %xmm0, -72(%edx)
+L(aligned_16_56bytes):
+ movdqa %xmm0, -56(%edx)
+L(aligned_16_40bytes):
+ movdqa %xmm0, -40(%edx)
+L(aligned_16_24bytes):
+ movdqa %xmm0, -24(%edx)
+L(aligned_16_8bytes):
+ movq %xmm0, -8(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_121bytes):
+ movdqa %xmm0, -121(%edx)
+L(aligned_16_105bytes):
+ movdqa %xmm0, -105(%edx)
+L(aligned_16_89bytes):
+ movdqa %xmm0, -89(%edx)
+L(aligned_16_73bytes):
+ movdqa %xmm0, -73(%edx)
+L(aligned_16_57bytes):
+ movdqa %xmm0, -57(%edx)
+L(aligned_16_41bytes):
+ movdqa %xmm0, -41(%edx)
+L(aligned_16_25bytes):
+ movdqa %xmm0, -25(%edx)
+L(aligned_16_9bytes):
+ movq %xmm0, -9(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_122bytes):
+ movdqa %xmm0, -122(%edx)
+L(aligned_16_106bytes):
+ movdqa %xmm0, -106(%edx)
+L(aligned_16_90bytes):
+ movdqa %xmm0, -90(%edx)
+L(aligned_16_74bytes):
+ movdqa %xmm0, -74(%edx)
+L(aligned_16_58bytes):
+ movdqa %xmm0, -58(%edx)
+L(aligned_16_42bytes):
+ movdqa %xmm0, -42(%edx)
+L(aligned_16_26bytes):
+ movdqa %xmm0, -26(%edx)
+L(aligned_16_10bytes):
+ movq %xmm0, -10(%edx)
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_123bytes):
+ movdqa %xmm0, -123(%edx)
+L(aligned_16_107bytes):
+ movdqa %xmm0, -107(%edx)
+L(aligned_16_91bytes):
+ movdqa %xmm0, -91(%edx)
+L(aligned_16_75bytes):
+ movdqa %xmm0, -75(%edx)
+L(aligned_16_59bytes):
+ movdqa %xmm0, -59(%edx)
+L(aligned_16_43bytes):
+ movdqa %xmm0, -43(%edx)
+L(aligned_16_27bytes):
+ movdqa %xmm0, -27(%edx)
+L(aligned_16_11bytes):
+ movq %xmm0, -11(%edx)
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_124bytes):
+ movdqa %xmm0, -124(%edx)
+L(aligned_16_108bytes):
+ movdqa %xmm0, -108(%edx)
+L(aligned_16_92bytes):
+ movdqa %xmm0, -92(%edx)
+L(aligned_16_76bytes):
+ movdqa %xmm0, -76(%edx)
+L(aligned_16_60bytes):
+ movdqa %xmm0, -60(%edx)
+L(aligned_16_44bytes):
+ movdqa %xmm0, -44(%edx)
+L(aligned_16_28bytes):
+ movdqa %xmm0, -28(%edx)
+L(aligned_16_12bytes):
+ movq %xmm0, -12(%edx)
+ movl %eax, -4(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_125bytes):
+ movdqa %xmm0, -125(%edx)
+L(aligned_16_109bytes):
+ movdqa %xmm0, -109(%edx)
+L(aligned_16_93bytes):
+ movdqa %xmm0, -93(%edx)
+L(aligned_16_77bytes):
+ movdqa %xmm0, -77(%edx)
+L(aligned_16_61bytes):
+ movdqa %xmm0, -61(%edx)
+L(aligned_16_45bytes):
+ movdqa %xmm0, -45(%edx)
+L(aligned_16_29bytes):
+ movdqa %xmm0, -29(%edx)
+L(aligned_16_13bytes):
+ movq %xmm0, -13(%edx)
+ movl %eax, -5(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_126bytes):
+ movdqa %xmm0, -126(%edx)
+L(aligned_16_110bytes):
+ movdqa %xmm0, -110(%edx)
+L(aligned_16_94bytes):
+ movdqa %xmm0, -94(%edx)
+L(aligned_16_78bytes):
+ movdqa %xmm0, -78(%edx)
+L(aligned_16_62bytes):
+ movdqa %xmm0, -62(%edx)
+L(aligned_16_46bytes):
+ movdqa %xmm0, -46(%edx)
+L(aligned_16_30bytes):
+ movdqa %xmm0, -30(%edx)
+L(aligned_16_14bytes):
+ movq %xmm0, -14(%edx)
+ movl %eax, -6(%edx)
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_127bytes):
+ movdqa %xmm0, -127(%edx)
+L(aligned_16_111bytes):
+ movdqa %xmm0, -111(%edx)
+L(aligned_16_95bytes):
+ movdqa %xmm0, -95(%edx)
+L(aligned_16_79bytes):
+ movdqa %xmm0, -79(%edx)
+L(aligned_16_63bytes):
+ movdqa %xmm0, -63(%edx)
+L(aligned_16_47bytes):
+ movdqa %xmm0, -47(%edx)
+L(aligned_16_31bytes):
+ movdqa %xmm0, -31(%edx)
+L(aligned_16_15bytes):
+ movq %xmm0, -15(%edx)
+ movl %eax, -7(%edx)
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN_END
+
+END (__memset_sse2_rep)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2.S
new file mode 100644
index 0000000000..d7b8be9114
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memset-sse2.S
@@ -0,0 +1,860 @@
+/* memset with SSE2
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_BZERO
+# define DEST PARMS
+# define LEN DEST+4
+# define SETRTNVAL
+#else
+# define DEST PARMS
+# define CHR DEST+4
+# define LEN CHR+4
+# define SETRTNVAL movl DEST(%esp), %eax
+#endif
+
+#ifdef SHARED
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define PARMS 8 /* Preserve EBX. */
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ add $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ add (%ebx,%ecx,4), %ebx; \
+ add %ecx, %edx; \
+ /* We loaded the jump table and adjusted EDX. Go. */ \
+ jmp *%ebx
+#else
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define PARMS 4
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
+ add %ecx, %edx; \
+ jmp *TABLE(,%ecx,4)
+#endif
+
+ .section .text.sse2,"ax",@progbits
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk_sse2)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_sse2)
+#endif
+ENTRY (__memset_sse2)
+ ENTRANCE
+
+ movl LEN(%esp), %ecx
+#ifdef USE_AS_BZERO
+ xor %eax, %eax
+#else
+ movzbl CHR(%esp), %eax
+ movb %al, %ah
+ /* Fill the whole EAX with pattern. */
+ movl %eax, %edx
+ shl $16, %eax
+ or %edx, %eax
+#endif
+ movl DEST(%esp), %edx
+ cmp $32, %ecx
+ jae L(32bytesormore)
+
+L(write_less32bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
+
+
+ .pushsection .rodata.sse2,"a",@progbits
+ ALIGN (2)
+L(table_less_32bytes):
+ .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
+ .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
+ .popsection
+
+ ALIGN (4)
+L(write_28bytes):
+ movl %eax, -28(%edx)
+L(write_24bytes):
+ movl %eax, -24(%edx)
+L(write_20bytes):
+ movl %eax, -20(%edx)
+L(write_16bytes):
+ movl %eax, -16(%edx)
+L(write_12bytes):
+ movl %eax, -12(%edx)
+L(write_8bytes):
+ movl %eax, -8(%edx)
+L(write_4bytes):
+ movl %eax, -4(%edx)
+L(write_0bytes):
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(write_29bytes):
+ movl %eax, -29(%edx)
+L(write_25bytes):
+ movl %eax, -25(%edx)
+L(write_21bytes):
+ movl %eax, -21(%edx)
+L(write_17bytes):
+ movl %eax, -17(%edx)
+L(write_13bytes):
+ movl %eax, -13(%edx)
+L(write_9bytes):
+ movl %eax, -9(%edx)
+L(write_5bytes):
+ movl %eax, -5(%edx)
+L(write_1bytes):
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(write_30bytes):
+ movl %eax, -30(%edx)
+L(write_26bytes):
+ movl %eax, -26(%edx)
+L(write_22bytes):
+ movl %eax, -22(%edx)
+L(write_18bytes):
+ movl %eax, -18(%edx)
+L(write_14bytes):
+ movl %eax, -14(%edx)
+L(write_10bytes):
+ movl %eax, -10(%edx)
+L(write_6bytes):
+ movl %eax, -6(%edx)
+L(write_2bytes):
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(write_31bytes):
+ movl %eax, -31(%edx)
+L(write_27bytes):
+ movl %eax, -27(%edx)
+L(write_23bytes):
+ movl %eax, -23(%edx)
+L(write_19bytes):
+ movl %eax, -19(%edx)
+L(write_15bytes):
+ movl %eax, -15(%edx)
+L(write_11bytes):
+ movl %eax, -11(%edx)
+L(write_7bytes):
+ movl %eax, -7(%edx)
+L(write_3bytes):
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(32bytesormore):
+ /* Fill xmm0 with the pattern. */
+#ifdef USE_AS_BZERO
+ pxor %xmm0, %xmm0
+#else
+ movd %eax, %xmm0
+ pshufd $0, %xmm0, %xmm0
+#endif
+ testl $0xf, %edx
+ jz L(aligned_16)
+/* ECX > 32 and EDX is not 16 byte aligned. */
+L(not_aligned_16):
+ movdqu %xmm0, (%edx)
+ movl %edx, %eax
+ and $-16, %edx
+ add $16, %edx
+ sub %edx, %eax
+ add %eax, %ecx
+ movd %xmm0, %eax
+
+ ALIGN (4)
+L(aligned_16):
+ cmp $128, %ecx
+ jae L(128bytesormore)
+
+L(aligned_16_less128bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+ ALIGN (4)
+L(128bytesormore):
+#ifdef SHARED_CACHE_SIZE
+ PUSH (%ebx)
+ mov $SHARED_CACHE_SIZE, %ebx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
+# else
+ PUSH (%ebx)
+ mov __x86_shared_cache_size, %ebx
+# endif
+#endif
+ cmp %ebx, %ecx
+ jae L(128bytesormore_nt_start)
+
+
+#ifdef DATA_CACHE_SIZE
+ POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+ cmp $DATA_CACHE_SIZE, %ecx
+#else
+# ifdef SHARED
+# define RESTORE_EBX_STATE
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
+# else
+ POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+ cmp __x86_data_cache_size, %ecx
+# endif
+#endif
+
+ jae L(128bytes_L2_normal)
+ subl $128, %ecx
+L(128bytesormore_normal):
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm0, 0x10(%edx)
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm0, 0x30(%edx)
+ movdqa %xmm0, 0x40(%edx)
+ movdqa %xmm0, 0x50(%edx)
+ movdqa %xmm0, 0x60(%edx)
+ movdqa %xmm0, 0x70(%edx)
+ lea 128(%edx), %edx
+ jb L(128bytesless_normal)
+
+
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm0, 0x10(%edx)
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm0, 0x30(%edx)
+ movdqa %xmm0, 0x40(%edx)
+ movdqa %xmm0, 0x50(%edx)
+ movdqa %xmm0, 0x60(%edx)
+ movdqa %xmm0, 0x70(%edx)
+ lea 128(%edx), %edx
+ jae L(128bytesormore_normal)
+
+L(128bytesless_normal):
+ add $128, %ecx
+ BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+ ALIGN (4)
+L(128bytes_L2_normal):
+ prefetcht0 0x380(%edx)
+ prefetcht0 0x3c0(%edx)
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm0, 0x10(%edx)
+ movaps %xmm0, 0x20(%edx)
+ movaps %xmm0, 0x30(%edx)
+ movaps %xmm0, 0x40(%edx)
+ movaps %xmm0, 0x50(%edx)
+ movaps %xmm0, 0x60(%edx)
+ movaps %xmm0, 0x70(%edx)
+ add $128, %edx
+ cmp $128, %ecx
+ jae L(128bytes_L2_normal)
+
+L(128bytesless_L2_normal):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+ RESTORE_EBX_STATE
+L(128bytesormore_nt_start):
+ sub %ebx, %ecx
+ ALIGN (4)
+L(128bytesormore_shared_cache_loop):
+ prefetcht0 0x3c0(%edx)
+ prefetcht0 0x380(%edx)
+ sub $0x80, %ebx
+ movdqa %xmm0, (%edx)
+ movdqa %xmm0, 0x10(%edx)
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm0, 0x30(%edx)
+ movdqa %xmm0, 0x40(%edx)
+ movdqa %xmm0, 0x50(%edx)
+ movdqa %xmm0, 0x60(%edx)
+ movdqa %xmm0, 0x70(%edx)
+ add $0x80, %edx
+ cmp $0x80, %ebx
+ jae L(128bytesormore_shared_cache_loop)
+ cmp $0x80, %ecx
+ jb L(shared_cache_loop_end)
+ ALIGN (4)
+L(128bytesormore_nt):
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm0, 0x10(%edx)
+ movntdq %xmm0, 0x20(%edx)
+ movntdq %xmm0, 0x30(%edx)
+ movntdq %xmm0, 0x40(%edx)
+ movntdq %xmm0, 0x50(%edx)
+ movntdq %xmm0, 0x60(%edx)
+ movntdq %xmm0, 0x70(%edx)
+ add $0x80, %edx
+ cmp $0x80, %ecx
+ jae L(128bytesormore_nt)
+ sfence
+L(shared_cache_loop_end):
+#if defined DATA_CACHE_SIZE || !defined SHARED
+ POP (%ebx)
+#endif
+ BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+
+ .pushsection .rodata.sse2,"a",@progbits
+ ALIGN (2)
+L(table_16_128bytes):
+ .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
+ .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
+ .popsection
+
+ ALIGN (4)
+L(aligned_16_112bytes):
+ movdqa %xmm0, -112(%edx)
+L(aligned_16_96bytes):
+ movdqa %xmm0, -96(%edx)
+L(aligned_16_80bytes):
+ movdqa %xmm0, -80(%edx)
+L(aligned_16_64bytes):
+ movdqa %xmm0, -64(%edx)
+L(aligned_16_48bytes):
+ movdqa %xmm0, -48(%edx)
+L(aligned_16_32bytes):
+ movdqa %xmm0, -32(%edx)
+L(aligned_16_16bytes):
+ movdqa %xmm0, -16(%edx)
+L(aligned_16_0bytes):
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_113bytes):
+ movdqa %xmm0, -113(%edx)
+L(aligned_16_97bytes):
+ movdqa %xmm0, -97(%edx)
+L(aligned_16_81bytes):
+ movdqa %xmm0, -81(%edx)
+L(aligned_16_65bytes):
+ movdqa %xmm0, -65(%edx)
+L(aligned_16_49bytes):
+ movdqa %xmm0, -49(%edx)
+L(aligned_16_33bytes):
+ movdqa %xmm0, -33(%edx)
+L(aligned_16_17bytes):
+ movdqa %xmm0, -17(%edx)
+L(aligned_16_1bytes):
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_114bytes):
+ movdqa %xmm0, -114(%edx)
+L(aligned_16_98bytes):
+ movdqa %xmm0, -98(%edx)
+L(aligned_16_82bytes):
+ movdqa %xmm0, -82(%edx)
+L(aligned_16_66bytes):
+ movdqa %xmm0, -66(%edx)
+L(aligned_16_50bytes):
+ movdqa %xmm0, -50(%edx)
+L(aligned_16_34bytes):
+ movdqa %xmm0, -34(%edx)
+L(aligned_16_18bytes):
+ movdqa %xmm0, -18(%edx)
+L(aligned_16_2bytes):
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_115bytes):
+ movdqa %xmm0, -115(%edx)
+L(aligned_16_99bytes):
+ movdqa %xmm0, -99(%edx)
+L(aligned_16_83bytes):
+ movdqa %xmm0, -83(%edx)
+L(aligned_16_67bytes):
+ movdqa %xmm0, -67(%edx)
+L(aligned_16_51bytes):
+ movdqa %xmm0, -51(%edx)
+L(aligned_16_35bytes):
+ movdqa %xmm0, -35(%edx)
+L(aligned_16_19bytes):
+ movdqa %xmm0, -19(%edx)
+L(aligned_16_3bytes):
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_116bytes):
+ movdqa %xmm0, -116(%edx)
+L(aligned_16_100bytes):
+ movdqa %xmm0, -100(%edx)
+L(aligned_16_84bytes):
+ movdqa %xmm0, -84(%edx)
+L(aligned_16_68bytes):
+ movdqa %xmm0, -68(%edx)
+L(aligned_16_52bytes):
+ movdqa %xmm0, -52(%edx)
+L(aligned_16_36bytes):
+ movdqa %xmm0, -36(%edx)
+L(aligned_16_20bytes):
+ movdqa %xmm0, -20(%edx)
+L(aligned_16_4bytes):
+ movl %eax, -4(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_117bytes):
+ movdqa %xmm0, -117(%edx)
+L(aligned_16_101bytes):
+ movdqa %xmm0, -101(%edx)
+L(aligned_16_85bytes):
+ movdqa %xmm0, -85(%edx)
+L(aligned_16_69bytes):
+ movdqa %xmm0, -69(%edx)
+L(aligned_16_53bytes):
+ movdqa %xmm0, -53(%edx)
+L(aligned_16_37bytes):
+ movdqa %xmm0, -37(%edx)
+L(aligned_16_21bytes):
+ movdqa %xmm0, -21(%edx)
+L(aligned_16_5bytes):
+ movl %eax, -5(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_118bytes):
+ movdqa %xmm0, -118(%edx)
+L(aligned_16_102bytes):
+ movdqa %xmm0, -102(%edx)
+L(aligned_16_86bytes):
+ movdqa %xmm0, -86(%edx)
+L(aligned_16_70bytes):
+ movdqa %xmm0, -70(%edx)
+L(aligned_16_54bytes):
+ movdqa %xmm0, -54(%edx)
+L(aligned_16_38bytes):
+ movdqa %xmm0, -38(%edx)
+L(aligned_16_22bytes):
+ movdqa %xmm0, -22(%edx)
+L(aligned_16_6bytes):
+ movl %eax, -6(%edx)
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_119bytes):
+ movdqa %xmm0, -119(%edx)
+L(aligned_16_103bytes):
+ movdqa %xmm0, -103(%edx)
+L(aligned_16_87bytes):
+ movdqa %xmm0, -87(%edx)
+L(aligned_16_71bytes):
+ movdqa %xmm0, -71(%edx)
+L(aligned_16_55bytes):
+ movdqa %xmm0, -55(%edx)
+L(aligned_16_39bytes):
+ movdqa %xmm0, -39(%edx)
+L(aligned_16_23bytes):
+ movdqa %xmm0, -23(%edx)
+L(aligned_16_7bytes):
+ movl %eax, -7(%edx)
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_120bytes):
+ movdqa %xmm0, -120(%edx)
+L(aligned_16_104bytes):
+ movdqa %xmm0, -104(%edx)
+L(aligned_16_88bytes):
+ movdqa %xmm0, -88(%edx)
+L(aligned_16_72bytes):
+ movdqa %xmm0, -72(%edx)
+L(aligned_16_56bytes):
+ movdqa %xmm0, -56(%edx)
+L(aligned_16_40bytes):
+ movdqa %xmm0, -40(%edx)
+L(aligned_16_24bytes):
+ movdqa %xmm0, -24(%edx)
+L(aligned_16_8bytes):
+ movq %xmm0, -8(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_121bytes):
+ movdqa %xmm0, -121(%edx)
+L(aligned_16_105bytes):
+ movdqa %xmm0, -105(%edx)
+L(aligned_16_89bytes):
+ movdqa %xmm0, -89(%edx)
+L(aligned_16_73bytes):
+ movdqa %xmm0, -73(%edx)
+L(aligned_16_57bytes):
+ movdqa %xmm0, -57(%edx)
+L(aligned_16_41bytes):
+ movdqa %xmm0, -41(%edx)
+L(aligned_16_25bytes):
+ movdqa %xmm0, -25(%edx)
+L(aligned_16_9bytes):
+ movq %xmm0, -9(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_122bytes):
+ movdqa %xmm0, -122(%edx)
+L(aligned_16_106bytes):
+ movdqa %xmm0, -106(%edx)
+L(aligned_16_90bytes):
+ movdqa %xmm0, -90(%edx)
+L(aligned_16_74bytes):
+ movdqa %xmm0, -74(%edx)
+L(aligned_16_58bytes):
+ movdqa %xmm0, -58(%edx)
+L(aligned_16_42bytes):
+ movdqa %xmm0, -42(%edx)
+L(aligned_16_26bytes):
+ movdqa %xmm0, -26(%edx)
+L(aligned_16_10bytes):
+ movq %xmm0, -10(%edx)
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_123bytes):
+ movdqa %xmm0, -123(%edx)
+L(aligned_16_107bytes):
+ movdqa %xmm0, -107(%edx)
+L(aligned_16_91bytes):
+ movdqa %xmm0, -91(%edx)
+L(aligned_16_75bytes):
+ movdqa %xmm0, -75(%edx)
+L(aligned_16_59bytes):
+ movdqa %xmm0, -59(%edx)
+L(aligned_16_43bytes):
+ movdqa %xmm0, -43(%edx)
+L(aligned_16_27bytes):
+ movdqa %xmm0, -27(%edx)
+L(aligned_16_11bytes):
+ movq %xmm0, -11(%edx)
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_124bytes):
+ movdqa %xmm0, -124(%edx)
+L(aligned_16_108bytes):
+ movdqa %xmm0, -108(%edx)
+L(aligned_16_92bytes):
+ movdqa %xmm0, -92(%edx)
+L(aligned_16_76bytes):
+ movdqa %xmm0, -76(%edx)
+L(aligned_16_60bytes):
+ movdqa %xmm0, -60(%edx)
+L(aligned_16_44bytes):
+ movdqa %xmm0, -44(%edx)
+L(aligned_16_28bytes):
+ movdqa %xmm0, -28(%edx)
+L(aligned_16_12bytes):
+ movq %xmm0, -12(%edx)
+ movl %eax, -4(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_125bytes):
+ movdqa %xmm0, -125(%edx)
+L(aligned_16_109bytes):
+ movdqa %xmm0, -109(%edx)
+L(aligned_16_93bytes):
+ movdqa %xmm0, -93(%edx)
+L(aligned_16_77bytes):
+ movdqa %xmm0, -77(%edx)
+L(aligned_16_61bytes):
+ movdqa %xmm0, -61(%edx)
+L(aligned_16_45bytes):
+ movdqa %xmm0, -45(%edx)
+L(aligned_16_29bytes):
+ movdqa %xmm0, -29(%edx)
+L(aligned_16_13bytes):
+ movq %xmm0, -13(%edx)
+ movl %eax, -5(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_126bytes):
+ movdqa %xmm0, -126(%edx)
+L(aligned_16_110bytes):
+ movdqa %xmm0, -110(%edx)
+L(aligned_16_94bytes):
+ movdqa %xmm0, -94(%edx)
+L(aligned_16_78bytes):
+ movdqa %xmm0, -78(%edx)
+L(aligned_16_62bytes):
+ movdqa %xmm0, -62(%edx)
+L(aligned_16_46bytes):
+ movdqa %xmm0, -46(%edx)
+L(aligned_16_30bytes):
+ movdqa %xmm0, -30(%edx)
+L(aligned_16_14bytes):
+ movq %xmm0, -14(%edx)
+ movl %eax, -6(%edx)
+ movw %ax, -2(%edx)
+ SETRTNVAL
+ RETURN
+
+ ALIGN (4)
+L(aligned_16_127bytes):
+ movdqa %xmm0, -127(%edx)
+L(aligned_16_111bytes):
+ movdqa %xmm0, -111(%edx)
+L(aligned_16_95bytes):
+ movdqa %xmm0, -95(%edx)
+L(aligned_16_79bytes):
+ movdqa %xmm0, -79(%edx)
+L(aligned_16_63bytes):
+ movdqa %xmm0, -63(%edx)
+L(aligned_16_47bytes):
+ movdqa %xmm0, -47(%edx)
+L(aligned_16_31bytes):
+ movdqa %xmm0, -31(%edx)
+L(aligned_16_15bytes):
+ movq %xmm0, -15(%edx)
+ movl %eax, -7(%edx)
+ movw %ax, -3(%edx)
+ movb %al, -1(%edx)
+ SETRTNVAL
+ RETURN_END
+
+END (__memset_sse2)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memset.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memset.S
new file mode 100644
index 0000000000..f601663a9f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memset.S
@@ -0,0 +1,75 @@
+/* Multiple versions of memset
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#if IS_IN (libc)
+ .text
+ENTRY(memset)
+ .type memset, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memset_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memset_sse2)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memset_sse2_rep)
+2: ret
+END(memset)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memset_ia32, @function; \
+ .globl __memset_ia32; \
+ .p2align 4; \
+ __memset_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memset_ia32, .-__memset_ia32
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+ .type __memset_chk_ia32, @function; \
+ .globl __memset_chk_ia32; \
+ .p2align 4; \
+ __memset_chk_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+ cfi_endproc; .size __memset_chk_ia32, .-__memset_chk_ia32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memset; __GI_memset = __memset_ia32
+# endif
+
+# undef strong_alias
+# define strong_alias(original, alias)
+#endif
+
+#include "../memset.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/memset_chk.S b/REORG.TODO/sysdeps/i386/i686/multiarch/memset_chk.S
new file mode 100644
index 0000000000..573cf4208a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/memset_chk.S
@@ -0,0 +1,82 @@
+/* Multiple versions of __memset_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#if IS_IN (libc)
+ .text
+ENTRY(__memset_chk)
+ .type __memset_chk, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__memset_chk_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memset_chk_sse2)
+ HAS_ARCH_FEATURE (Fast_Rep_String)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep)
+2: ret
+END(__memset_chk)
+
+# ifdef SHARED
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+ .section .gnu.warning.__memset_zero_constant_len_parameter
+ .string "memset used with constant zero length parameter; this could be due to transposed parameters"
+# else
+ .text
+ .type __memset_chk_sse2, @function
+ .p2align 4;
+__memset_chk_sse2:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memset_sse2
+ cfi_endproc
+ .size __memset_chk_sse2, .-__memset_chk_sse2
+
+ .type __memset_chk_sse2_rep, @function
+ .p2align 4;
+__memset_chk_sse2_rep:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memset_sse2_rep
+ cfi_endproc
+ .size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep
+
+ .type __memset_chk_ia32, @function
+ .p2align 4;
+__memset_chk_ia32:
+ cfi_startproc
+ CALL_MCOUNT
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp __memset_ia32
+ cfi_endproc
+ .size __memset_chk_ia32, .-__memset_chk_ia32
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S b/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S
new file mode 100644
index 0000000000..88c0e5776c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S
@@ -0,0 +1,3 @@
+#define USE_AS_RAWMEMCHR
+#define MEMCHR __rawmemchr_sse2_bsf
+#include "memchr-sse2-bsf.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S
new file mode 100644
index 0000000000..038c74896b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_RAWMEMCHR
+#define MEMCHR __rawmemchr_sse2
+#include "memchr-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr.S
new file mode 100644
index 0000000000..0a41d63ee8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/rawmemchr.S
@@ -0,0 +1,65 @@
+/* Multiple versions of rawmemchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(__rawmemchr)
+ .type __rawmemchr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jz 3f
+
+ LOAD_FUNC_GOT_EAX (__rawmemchr_sse2)
+ ret
+
+2: LOAD_FUNC_GOT_EAX (__rawmemchr_ia32)
+ ret
+
+3: LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf)
+ ret
+END(__rawmemchr)
+
+weak_alias(__rawmemchr, rawmemchr)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __rawmemchr_ia32, @function; \
+ .globl __rawmemchr_ia32; \
+ .p2align 4; \
+ __rawmemchr_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __rawmemchr_ia32, .-__rawmemchr_ia32
+
+# undef libc_hidden_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_def(name) \
+ .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_ia32
+
+#endif
+#include "../../rawmemchr.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/rtld-strnlen.c b/REORG.TODO/sysdeps/i386/i686/multiarch/rtld-strnlen.c
new file mode 100644
index 0000000000..1aa5440644
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/rtld-strnlen.c
@@ -0,0 +1 @@
+#include <string/strnlen.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/s_fma-fma.c b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fma-fma.c
new file mode 100644
index 0000000000..2e9619f97c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fma-fma.c
@@ -0,0 +1,27 @@
+/* FMA version of fma.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+double
+__fma_fma (double x, double y, double z)
+{
+ asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
+ return x;
+}
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/s_fma.c b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fma.c
new file mode 100644
index 0000000000..411ebb2ba9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fma.c
@@ -0,0 +1,34 @@
+/* Multiple versions of fma.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <math.h>
+#include <init-arch.h>
+
+extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
+extern double __fma_fma (double x, double y, double z) attribute_hidden;
+
+libm_ifunc (__fma,
+ HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32);
+weak_alias (__fma, fma)
+
+#define __fma __fma_ia32
+
+#include <sysdeps/ieee754/ldbl-96/s_fma.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
new file mode 100644
index 0000000000..ee57abfda2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
@@ -0,0 +1,27 @@
+/* FMA version of fmaf.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+float
+__fmaf_fma (float x, float y, float z)
+{
+ asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
+ return x;
+}
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf.c b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf.c
new file mode 100644
index 0000000000..00b0fbcfc5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/s_fmaf.c
@@ -0,0 +1,34 @@
+/* Multiple versions of fmaf.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <math.h>
+#include <init-arch.h>
+
+extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden;
+extern float __fmaf_fma (float x, float y, float z) attribute_hidden;
+
+libm_ifunc (__fmaf,
+ HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32);
+weak_alias (__fmaf, fmaf)
+
+#define __fmaf __fmaf_ia32
+
+#include <sysdeps/ieee754/dbl-64/s_fmaf.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/sched_cpucount.c b/REORG.TODO/sysdeps/i386/i686/multiarch/sched_cpucount.c
new file mode 100644
index 0000000000..7db31b02f8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/sched_cpucount.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
new file mode 100644
index 0000000000..46ca1b3074
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy_sse2
+#include "strcpy-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
new file mode 100644
index 0000000000..d971c2da38
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
@@ -0,0 +1,3 @@
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy_ssse3
+#include "strcpy-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy.S
new file mode 100644
index 0000000000..ee81ab6ae3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/stpcpy.S
@@ -0,0 +1,9 @@
+/* Multiple versions of stpcpy
+ All versions must be listed in ifunc-impl-list.c. */
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy
+#include "strcpy.S"
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
new file mode 100644
index 0000000000..37a703cb76
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
@@ -0,0 +1,4 @@
+#define USE_AS_STPCPY
+#define USE_AS_STRNCPY
+#define STRCPY __stpncpy_sse2
+#include "strcpy-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
new file mode 100644
index 0000000000..14ed16f6b5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_AS_STPCPY
+#define USE_AS_STRNCPY
+#define STRCPY __stpncpy_ssse3
+#include "strcpy-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy.S
new file mode 100644
index 0000000000..2698ca6a8c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/stpncpy.S
@@ -0,0 +1,8 @@
+/* Multiple versions of stpncpy
+ All versions must be listed in ifunc-impl-list.c. */
+#define STRCPY __stpncpy
+#define USE_AS_STPCPY
+#define USE_AS_STRNCPY
+#include "strcpy.S"
+
+weak_alias (__stpncpy, stpncpy)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp-c.c
new file mode 100644
index 0000000000..753c6ec84a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp-c.c
@@ -0,0 +1,12 @@
+#include <string.h>
+
+extern __typeof (strcasecmp) __strcasecmp_nonascii;
+
+#define __strcasecmp __strcasecmp_nonascii
+#include <string/strcasecmp.c>
+
+strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32)
+
+/* The needs of strcasecmp in libc are minimal, no need to go through
+ the IFUNC. */
+strong_alias (__strcasecmp_nonascii, __GI___strcasecmp)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp.S
new file mode 100644
index 0000000000..ec59276408
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp.S
@@ -0,0 +1,39 @@
+/* Entry point for multi-version x86 strcasecmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+ .text
+ENTRY(__strcasecmp)
+ .type __strcasecmp, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strcasecmp_ia32)
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
+ jnz 2f
+ LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2)
+2: ret
+END(__strcasecmp)
+
+weak_alias (__strcasecmp, strcasecmp)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c
new file mode 100644
index 0000000000..d4fcd2b4a1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c
@@ -0,0 +1,13 @@
+#include <string.h>
+
+extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii;
+
+#define __strcasecmp_l __strcasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL 1
+#include <string/strcasecmp.c>
+
+strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_ia32)
+
+/* The needs of strcasecmp in libc are minimal, no need to go through
+ the IFUNC. */
+strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
new file mode 100644
index 0000000000..411d4153f2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
@@ -0,0 +1,2 @@
+#define USE_AS_STRCASECMP_L 1
+#include "strcmp-sse4.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S
new file mode 100644
index 0000000000..a22b93c518
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S
@@ -0,0 +1,2 @@
+#define USE_AS_STRCASECMP_L 1
+#include "strcmp-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l.S
new file mode 100644
index 0000000000..711c09b0dc
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcasecmp_l.S
@@ -0,0 +1,7 @@
+/* Multiple versions of strcasecmp_l
+ All versions must be listed in ifunc-impl-list.c. */
+#define STRCMP __strcasecmp_l
+#define USE_AS_STRCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcat-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcat-sse2.S
new file mode 100644
index 0000000000..6359c7330c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcat-sse2.S
@@ -0,0 +1,1245 @@
+/* strcat with SSE2
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifdef SHARED
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into ECX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into ECX. */ \
+ SETUP_PIC_REG(cx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ecx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ecx,INDEX,SCALE), %ecx; \
+ /* We loaded the jump table and adjusted ECX. Go. */ \
+ jmp *%ecx
+# else
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+# endif
+
+# ifndef STRCAT
+# define STRCAT __strcat_sse2
+# endif
+
+# define PARMS 4
+# define STR1 PARMS+4
+# define STR2 STR1+4
+
+# ifdef USE_AS_STRNCAT
+# define LEN STR2+8
+# define STR3 STR1+4
+# else
+# define STR3 STR1
+# endif
+
+# define USE_AS_STRCAT
+# ifdef USE_AS_STRNCAT
+# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
+# else
+# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
+# endif
+
+.text
+ENTRY (STRCAT)
+ PUSH (%esi)
+ mov STR1(%esp), %eax
+ mov STR2(%esp), %esi
+# ifdef USE_AS_STRNCAT
+ PUSH (%ebx)
+ movl LEN(%esp), %ebx
+ test %ebx, %ebx
+ jz L(ExitZero)
+# endif
+ cmpb $0, (%esi)
+ mov %esi, %ecx
+ mov %eax, %edx
+ jz L(ExitZero)
+
+ and $63, %ecx
+ and $63, %edx
+ cmp $32, %ecx
+ ja L(StrlenCore7_1)
+ cmp $48, %edx
+ ja L(alignment_prolog)
+
+ pxor %xmm0, %xmm0
+ pxor %xmm4, %xmm4
+ pxor %xmm7, %xmm7
+ movdqu (%eax), %xmm1
+ movdqu (%esi), %xmm5
+ pcmpeqb %xmm1, %xmm0
+ movdqu 16(%esi), %xmm6
+ pmovmskb %xmm0, %ecx
+ pcmpeqb %xmm5, %xmm4
+ pcmpeqb %xmm6, %xmm7
+ test %ecx, %ecx
+ jnz L(exit_less16_)
+ mov %eax, %ecx
+ and $-16, %eax
+ jmp L(loop_prolog)
+
+L(alignment_prolog):
+ pxor %xmm0, %xmm0
+ pxor %xmm4, %xmm4
+ mov %edx, %ecx
+ pxor %xmm7, %xmm7
+ and $15, %ecx
+ and $-16, %eax
+ pcmpeqb (%eax), %xmm0
+ movdqu (%esi), %xmm5
+ movdqu 16(%esi), %xmm6
+ pmovmskb %xmm0, %edx
+ pcmpeqb %xmm5, %xmm4
+ shr %cl, %edx
+ pcmpeqb %xmm6, %xmm7
+ test %edx, %edx
+ jnz L(exit_less16)
+ add %eax, %ecx
+
+ pxor %xmm0, %xmm0
+L(loop_prolog):
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+ .p2align 4
+L(align16_loop):
+ pcmpeqb 16(%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ lea 64(%eax), %eax
+ test %edx, %edx
+ jz L(align16_loop)
+ bsf %edx, %edx
+ add %edx, %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit16):
+ bsf %edx, %edx
+ lea 16(%eax, %edx), %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit32):
+ bsf %edx, %edx
+ lea 32(%eax, %edx), %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit48):
+ bsf %edx, %edx
+ lea 48(%eax, %edx), %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit_less16):
+ bsf %edx, %edx
+ add %ecx, %eax
+ add %edx, %eax
+ jmp L(StartStrcpyPart)
+
+ .p2align 4
+L(exit_less16_):
+ bsf %ecx, %ecx
+ add %ecx, %eax
+
+ .p2align 4
+L(StartStrcpyPart):
+ pmovmskb %xmm4, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail1)
+
+ movdqu %xmm5, (%eax)
+ pmovmskb %xmm7, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $32, %ebx
+ jbe L(CopyFrom1To32Bytes1Case2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes1)
+
+ mov %esi, %ecx
+ and $-16, %esi
+ and $15, %ecx
+ pxor %xmm0, %xmm0
+# ifdef USE_AS_STRNCAT
+ add %ecx, %ebx
+ sbb %edx, %edx
+ or %edx, %ebx
+# endif
+ sub %ecx, %eax
+ jmp L(Unalign16Both)
+
+L(StrlenCore7_1):
+ mov %eax, %ecx
+ pxor %xmm0, %xmm0
+ and $15, %ecx
+ and $-16, %eax
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ shr %cl, %edx
+ test %edx, %edx
+ jnz L(exit_less16_1)
+ add %eax, %ecx
+
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+
+ .p2align 4
+L(align16_loop_1):
+ pcmpeqb 16(%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16_1)
+
+ pcmpeqb 32(%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32_1)
+
+ pcmpeqb 48(%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48_1)
+
+ pcmpeqb 64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ lea 64(%eax), %eax
+ test %edx, %edx
+ jz L(align16_loop_1)
+ bsf %edx, %edx
+ add %edx, %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit16_1):
+ bsf %edx, %edx
+ lea 16(%eax, %edx), %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit32_1):
+ bsf %edx, %edx
+ lea 32(%eax, %edx), %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit48_1):
+ bsf %edx, %edx
+ lea 48(%eax, %edx), %eax
+ jmp L(StartStrcpyPart_1)
+
+ .p2align 4
+L(exit_less16_1):
+ bsf %edx, %edx
+ add %ecx, %eax
+ add %edx, %eax
+
+ .p2align 4
+L(StartStrcpyPart_1):
+ mov %esi, %ecx
+ and $15, %ecx
+ and $-16, %esi
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+
+# ifdef USE_AS_STRNCAT
+ cmp $48, %ebx
+ ja L(BigN)
+# endif
+ pcmpeqb (%esi), %xmm1
+# ifdef USE_AS_STRNCAT
+ add %ecx, %ebx
+# endif
+ pmovmskb %xmm1, %edx
+ shr %cl, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ jbe L(CopyFrom1To16BytesTailCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail)
+
+ pcmpeqb 16(%esi), %xmm0
+ pmovmskb %xmm0, %edx
+# ifdef USE_AS_STRNCAT
+ cmp $32, %ebx
+ jbe L(CopyFrom1To32BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes)
+
+ movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+ movdqu %xmm1, (%eax)
+ sub %ecx, %eax
+
+ .p2align 4
+L(Unalign16Both):
+ mov $16, %ecx
+ movdqa (%esi, %ecx), %xmm1
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%eax, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $48, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+L(Unalign16BothBigN):
+ movaps 16(%esi, %ecx), %xmm3
+ movdqu %xmm2, (%eax, %ecx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm4
+ movdqu %xmm3, (%eax, %ecx)
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm1
+ movdqu %xmm4, (%eax, %ecx)
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%eax, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%esi, %ecx), %xmm3
+ movdqu %xmm2, (%eax, %ecx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+# ifdef USE_AS_STRNCAT
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ movdqu %xmm3, (%eax, %ecx)
+ mov %esi, %edx
+ lea 16(%esi, %ecx), %esi
+ and $-0x40, %esi
+ sub %esi, %edx
+ sub %edx, %eax
+# ifdef USE_AS_STRNCAT
+ lea 128(%ebx, %edx), %ebx
+# endif
+ movaps (%esi), %xmm2
+ movaps %xmm2, %xmm4
+ movaps 16(%esi), %xmm5
+ movaps 32(%esi), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 48(%esi), %xmm7
+ pminub %xmm5, %xmm2
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+# ifdef USE_AS_STRNCAT
+ sub $64, %ebx
+ jbe L(UnalignedLeaveCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(Unaligned64Leave)
+
+ .p2align 4
+L(Unaligned64Loop_start):
+ add $64, %eax
+ add $64, %esi
+ movdqu %xmm4, -64(%eax)
+ movaps (%esi), %xmm2
+ movdqa %xmm2, %xmm4
+ movdqu %xmm5, -48(%eax)
+ movaps 16(%esi), %xmm5
+ pminub %xmm5, %xmm2
+ movaps 32(%esi), %xmm3
+ movdqu %xmm6, -32(%eax)
+ movaps %xmm3, %xmm6
+ movdqu %xmm7, -16(%eax)
+ movaps 48(%esi), %xmm7
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+# ifdef USE_AS_STRNCAT
+ sub $64, %ebx
+ jbe L(UnalignedLeaveCase2OrCase3)
+# endif
+ test %edx, %edx
+ jz L(Unaligned64Loop_start)
+
+L(Unaligned64Leave):
+ pxor %xmm1, %xmm1
+
+ pcmpeqb %xmm4, %xmm0
+ pcmpeqb %xmm5, %xmm1
+ pmovmskb %xmm0, %edx
+ pmovmskb %xmm1, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnaligned_0)
+ test %ecx, %ecx
+ jnz L(CopyFrom1To16BytesUnaligned_16)
+
+ pcmpeqb %xmm6, %xmm0
+ pcmpeqb %xmm7, %xmm1
+ pmovmskb %xmm0, %edx
+ pmovmskb %xmm1, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnaligned_32)
+
+ bsf %ecx, %edx
+ movdqu %xmm4, (%eax)
+ movdqu %xmm5, 16(%eax)
+ movdqu %xmm6, 32(%eax)
+ add $48, %esi
+ add $48, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+# ifdef USE_AS_STRNCAT
+ .p2align 4
+L(BigN):
+ pcmpeqb (%esi), %xmm1
+ pmovmskb %xmm1, %edx
+ shr %cl, %edx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail)
+
+ pcmpeqb 16(%esi), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes)
+
+ movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+ movdqu %xmm1, (%eax)
+ sub %ecx, %eax
+ sub $48, %ebx
+ add %ecx, %ebx
+
+ mov $16, %ecx
+ movdqa (%esi, %ecx), %xmm1
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%eax, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+ jmp L(Unalign16BothBigN)
+# endif
+
+/*------------end of main part-------------------------------*/
+
+/* Case1 */
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %ecx, %eax
+ add %ecx, %esi
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesTail):
+ add %ecx, %esi
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes1):
+ add $16, %esi
+ add $16, %eax
+L(CopyFrom1To16BytesTail1):
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes):
+ bsf %edx, %edx
+ add %ecx, %esi
+ add $16, %edx
+ sub %ecx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_0):
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_16):
+ bsf %ecx, %edx
+ movdqu %xmm4, (%eax)
+ add $16, %esi
+ add $16, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_32):
+ bsf %edx, %edx
+ movdqu %xmm4, (%eax)
+ movdqu %xmm5, 16(%eax)
+ add $32, %esi
+ add $32, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+# ifdef USE_AS_STRNCAT
+
+ .p2align 4
+L(CopyFrom1To16BytesExit):
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+/* Case2 */
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %ebx
+ add %ecx, %eax
+ add %ecx, %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32BytesCase2):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ add $16, %edx
+ sub %ecx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTailCase2):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTail1Case2):
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+/* Case2 or Case3, Case3 */
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesCase2)
+L(CopyFrom1To16BytesCase3):
+ add $16, %ebx
+ add %ecx, %eax
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32BytesCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To32BytesCase2)
+ sub %ecx, %ebx
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesTailCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTailCase2)
+ sub %ecx, %ebx
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes1Case2OrCase3):
+ add $16, %eax
+ add $16, %esi
+ sub $16, %ebx
+L(CopyFrom1To16BytesTail1Case2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail1Case2)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+
+# endif
+
+# ifdef USE_AS_STRNCAT
+ .p2align 4
+L(StrncatExit0):
+ movb %bh, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+# endif
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit1):
+ movb %bh, 1(%eax)
+# endif
+L(Exit1):
+# ifdef USE_AS_STRNCAT
+ movb (%esi), %dh
+# endif
+ movb %dh, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit2):
+ movb %bh, 2(%eax)
+# endif
+L(Exit2):
+ movw (%esi), %dx
+ movw %dx, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit3):
+ movb %bh, 3(%eax)
+# endif
+L(Exit3):
+ movw (%esi), %cx
+ movw %cx, (%eax)
+# ifdef USE_AS_STRNCAT
+ movb 2(%esi), %dh
+# endif
+ movb %dh, 2(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit4):
+ movb %bh, 4(%eax)
+# endif
+L(Exit4):
+ movl (%esi), %edx
+ movl %edx, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit5):
+ movb %bh, 5(%eax)
+# endif
+L(Exit5):
+ movl (%esi), %ecx
+# ifdef USE_AS_STRNCAT
+ movb 4(%esi), %dh
+# endif
+ movb %dh, 4(%eax)
+ movl %ecx, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit6):
+ movb %bh, 6(%eax)
+# endif
+L(Exit6):
+ movl (%esi), %ecx
+ movw 4(%esi), %dx
+ movl %ecx, (%eax)
+ movw %dx, 4(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit7):
+ movb %bh, 7(%eax)
+# endif
+L(Exit7):
+ movl (%esi), %ecx
+ movl 3(%esi), %edx
+ movl %ecx, (%eax)
+ movl %edx, 3(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit8):
+ movb %bh, 8(%eax)
+# endif
+L(Exit8):
+ movlpd (%esi), %xmm0
+ movlpd %xmm0, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit9):
+ movb %bh, 9(%eax)
+# endif
+L(Exit9):
+ movlpd (%esi), %xmm0
+# ifdef USE_AS_STRNCAT
+ movb 8(%esi), %dh
+# endif
+ movb %dh, 8(%eax)
+ movlpd %xmm0, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit10):
+ movb %bh, 10(%eax)
+# endif
+L(Exit10):
+ movlpd (%esi), %xmm0
+ movw 8(%esi), %dx
+ movlpd %xmm0, (%eax)
+ movw %dx, 8(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit11):
+ movb %bh, 11(%eax)
+# endif
+L(Exit11):
+ movlpd (%esi), %xmm0
+ movl 7(%esi), %edx
+ movlpd %xmm0, (%eax)
+ movl %edx, 7(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit12):
+ movb %bh, 12(%eax)
+# endif
+L(Exit12):
+ movlpd (%esi), %xmm0
+ movl 8(%esi), %edx
+ movlpd %xmm0, (%eax)
+ movl %edx, 8(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit13):
+ movb %bh, 13(%eax)
+# endif
+L(Exit13):
+ movlpd (%esi), %xmm0
+ movlpd 5(%esi), %xmm1
+ movlpd %xmm0, (%eax)
+ movlpd %xmm1, 5(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit14):
+ movb %bh, 14(%eax)
+# endif
+L(Exit14):
+ movlpd (%esi), %xmm0
+ movlpd 6(%esi), %xmm1
+ movlpd %xmm0, (%eax)
+ movlpd %xmm1, 6(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit15):
+ movb %bh, 15(%eax)
+# endif
+L(Exit15):
+ movlpd (%esi), %xmm0
+ movlpd 7(%esi), %xmm1
+ movlpd %xmm0, (%eax)
+ movlpd %xmm1, 7(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit16):
+ movb %bh, 16(%eax)
+# endif
+L(Exit16):
+ movdqu (%esi), %xmm0
+ movdqu %xmm0, (%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit17):
+ movb %bh, 17(%eax)
+# endif
+L(Exit17):
+ movdqu (%esi), %xmm0
+# ifdef USE_AS_STRNCAT
+ movb 16(%esi), %dh
+# endif
+ movdqu %xmm0, (%eax)
+ movb %dh, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit18):
+ movb %bh, 18(%eax)
+# endif
+L(Exit18):
+ movdqu (%esi), %xmm0
+ movw 16(%esi), %cx
+ movdqu %xmm0, (%eax)
+ movw %cx, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit19):
+ movb %bh, 19(%eax)
+# endif
+L(Exit19):
+ movdqu (%esi), %xmm0
+ movl 15(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movl %ecx, 15(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit20):
+ movb %bh, 20(%eax)
+# endif
+L(Exit20):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movl %ecx, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit21):
+ movb %bh, 21(%eax)
+# endif
+L(Exit21):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+# ifdef USE_AS_STRNCAT
+ movb 20(%esi), %dh
+# endif
+ movdqu %xmm0, (%eax)
+ movl %ecx, 16(%eax)
+ movb %dh, 20(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit22):
+ movb %bh, 22(%eax)
+# endif
+L(Exit22):
+ movdqu (%esi), %xmm0
+ movlpd 14(%esi), %xmm3
+ movdqu %xmm0, (%eax)
+ movlpd %xmm3, 14(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit23):
+ movb %bh, 23(%eax)
+# endif
+L(Exit23):
+ movdqu (%esi), %xmm0
+ movlpd 15(%esi), %xmm3
+ movdqu %xmm0, (%eax)
+ movlpd %xmm3, 15(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit24):
+ movb %bh, 24(%eax)
+# endif
+L(Exit24):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit25):
+ movb %bh, 25(%eax)
+# endif
+L(Exit25):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+# ifdef USE_AS_STRNCAT
+ movb 24(%esi), %dh
+# endif
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movb %dh, 24(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit26):
+ movb %bh, 26(%eax)
+# endif
+L(Exit26):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movw 24(%esi), %cx
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movw %cx, 24(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit27):
+ movb %bh, 27(%eax)
+# endif
+L(Exit27):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 23(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movl %ecx, 23(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit28):
+ movb %bh, 28(%eax)
+# endif
+L(Exit28):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 24(%esi), %ecx
+ movdqu %xmm0, (%eax)
+ movlpd %xmm2, 16(%eax)
+ movl %ecx, 24(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit29):
+ movb %bh, 29(%eax)
+# endif
+L(Exit29):
+ movdqu (%esi), %xmm0
+ movdqu 13(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 13(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit30):
+ movb %bh, 30(%eax)
+# endif
+L(Exit30):
+ movdqu (%esi), %xmm0
+ movdqu 14(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 14(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit31):
+ movb %bh, 31(%eax)
+# endif
+L(Exit31):
+ movdqu (%esi), %xmm0
+ movdqu 15(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 15(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+# ifdef USE_AS_STRNCAT
+L(StrncatExit32):
+ movb %bh, 32(%eax)
+# endif
+L(Exit32):
+ movdqu (%esi), %xmm0
+ movdqu 16(%esi), %xmm2
+ movdqu %xmm0, (%eax)
+ movdqu %xmm2, 16(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+# ifdef USE_AS_STRNCAT
+
+ .p2align 4
+L(UnalignedLeaveCase2OrCase3):
+ test %edx, %edx
+ jnz L(Unaligned64LeaveCase2)
+L(Unaligned64LeaveCase3):
+ lea 64(%ebx), %ecx
+ and $-16, %ecx
+ add $48, %ebx
+ jl L(CopyFrom1To16BytesCase3)
+ movdqu %xmm4, (%eax)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm5, 16(%eax)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm6, 32(%eax)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm7, 48(%eax)
+ xor %bh, %bh
+ movb %bh, 64(%eax)
+ mov STR3(%esp), %eax
+ RETURN
+
+ .p2align 4
+L(Unaligned64LeaveCase2):
+ xor %ecx, %ecx
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %edx
+ add $48, %ebx
+ jle L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm5, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm4, (%eax)
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm6, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm5, 16(%eax)
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm7, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm6, 32(%eax)
+ lea 16(%eax, %ecx), %eax
+ lea 16(%esi, %ecx), %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
+# endif
+ .p2align 4
+L(ExitZero):
+ RETURN
+
+END (STRCAT)
+
+ .p2align 4
+ .section .rodata
+L(ExitTable):
+ .int JMPTBL(L(Exit1), L(ExitTable))
+ .int JMPTBL(L(Exit2), L(ExitTable))
+ .int JMPTBL(L(Exit3), L(ExitTable))
+ .int JMPTBL(L(Exit4), L(ExitTable))
+ .int JMPTBL(L(Exit5), L(ExitTable))
+ .int JMPTBL(L(Exit6), L(ExitTable))
+ .int JMPTBL(L(Exit7), L(ExitTable))
+ .int JMPTBL(L(Exit8), L(ExitTable))
+ .int JMPTBL(L(Exit9), L(ExitTable))
+ .int JMPTBL(L(Exit10), L(ExitTable))
+ .int JMPTBL(L(Exit11), L(ExitTable))
+ .int JMPTBL(L(Exit12), L(ExitTable))
+ .int JMPTBL(L(Exit13), L(ExitTable))
+ .int JMPTBL(L(Exit14), L(ExitTable))
+ .int JMPTBL(L(Exit15), L(ExitTable))
+ .int JMPTBL(L(Exit16), L(ExitTable))
+ .int JMPTBL(L(Exit17), L(ExitTable))
+ .int JMPTBL(L(Exit18), L(ExitTable))
+ .int JMPTBL(L(Exit19), L(ExitTable))
+ .int JMPTBL(L(Exit20), L(ExitTable))
+ .int JMPTBL(L(Exit21), L(ExitTable))
+ .int JMPTBL(L(Exit22), L(ExitTable))
+ .int JMPTBL(L(Exit23), L(ExitTable))
+ .int JMPTBL(L(Exit24), L(ExitTable))
+ .int JMPTBL(L(Exit25), L(ExitTable))
+ .int JMPTBL(L(Exit26), L(ExitTable))
+ .int JMPTBL(L(Exit27), L(ExitTable))
+ .int JMPTBL(L(Exit28), L(ExitTable))
+ .int JMPTBL(L(Exit29), L(ExitTable))
+ .int JMPTBL(L(Exit30), L(ExitTable))
+ .int JMPTBL(L(Exit31), L(ExitTable))
+ .int JMPTBL(L(Exit32), L(ExitTable))
+# ifdef USE_AS_STRNCAT
+L(ExitStrncatTable):
+ .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
+ .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcat-ssse3.S
new file mode 100644
index 0000000000..59ffbc60a5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcat-ssse3.S
@@ -0,0 +1,572 @@
+/* strcat with SSSE3
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef STRCAT
+# define STRCAT __strcat_ssse3
+# endif
+
+# define PARMS 4
+# define STR1 PARMS+4
+# define STR2 STR1+4
+
+# ifdef USE_AS_STRNCAT
+# define LEN STR2+8
+# endif
+
+# define USE_AS_STRCAT
+
+.text
+ENTRY (STRCAT)
+ PUSH (%edi)
+ mov STR1(%esp), %edi
+ mov %edi, %edx
+
+# define RETURN jmp L(StartStrcpyPart)
+# include "strlen-sse2.S"
+
+L(StartStrcpyPart):
+ mov STR2(%esp), %ecx
+ lea (%edi, %eax), %edx
+# ifdef USE_AS_STRNCAT
+ PUSH (%ebx)
+ mov LEN(%esp), %ebx
+ test %ebx, %ebx
+ jz L(StrncatExit0)
+ cmp $8, %ebx
+ jbe L(StrncatExit8Bytes)
+# endif
+ cmpb $0, (%ecx)
+ jz L(Exit1)
+ cmpb $0, 1(%ecx)
+ jz L(Exit2)
+ cmpb $0, 2(%ecx)
+ jz L(Exit3)
+ cmpb $0, 3(%ecx)
+ jz L(Exit4)
+ cmpb $0, 4(%ecx)
+ jz L(Exit5)
+ cmpb $0, 5(%ecx)
+ jz L(Exit6)
+ cmpb $0, 6(%ecx)
+ jz L(Exit7)
+ cmpb $0, 7(%ecx)
+ jz L(Exit8)
+ cmpb $0, 8(%ecx)
+ jz L(Exit9)
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ jb L(StrncatExit15Bytes)
+# endif
+ cmpb $0, 9(%ecx)
+ jz L(Exit10)
+ cmpb $0, 10(%ecx)
+ jz L(Exit11)
+ cmpb $0, 11(%ecx)
+ jz L(Exit12)
+ cmpb $0, 12(%ecx)
+ jz L(Exit13)
+ cmpb $0, 13(%ecx)
+ jz L(Exit14)
+ cmpb $0, 14(%ecx)
+ jz L(Exit15)
+ cmpb $0, 15(%ecx)
+ jz L(Exit16)
+# ifdef USE_AS_STRNCAT
+ cmp $16, %ebx
+ je L(StrncatExit16)
+
+# define RETURN1 \
+ POP (%ebx); \
+ POP (%edi); \
+ ret; \
+ CFI_PUSH (%ebx); \
+ CFI_PUSH (%edi)
+# define USE_AS_STRNCPY
+# else
+# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+# include "strcpy-ssse3.S"
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitHigh):
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x08, %ah
+ jnz L(Exit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit1):
+ movb %bh, 1(%edx)
+L(Exit1):
+ movb (%ecx), %al
+ movb %al, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit2):
+ movb %bh, 2(%edx)
+L(Exit2):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit3):
+ movb %bh, 3(%edx)
+L(Exit3):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movb 2(%ecx), %al
+ movb %al, 2(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit4):
+ movb %bh, 4(%edx)
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit5):
+ movb %bh, 5(%edx)
+L(Exit5):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movb 4(%ecx), %al
+ movb %al, 4(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit6):
+ movb %bh, 6(%edx)
+L(Exit6):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movw 4(%ecx), %ax
+ movw %ax, 4(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit7):
+ movb %bh, 7(%edx)
+L(Exit7):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 3(%ecx), %eax
+ movl %eax, 3(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit8):
+ movb %bh, 8(%edx)
+L(Exit8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit9):
+ movb %bh, 9(%edx)
+L(Exit9):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movb 8(%ecx), %al
+ movb %al, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit10):
+ movb %bh, 10(%edx)
+L(Exit10):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movw 8(%ecx), %ax
+ movw %ax, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit11):
+ movb %bh, 11(%edx)
+L(Exit11):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl 7(%ecx), %eax
+ movl %eax, 7(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit12):
+ movb %bh, 12(%edx)
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl 8(%ecx), %eax
+ movl %eax, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit13):
+ movb %bh, 13(%edx)
+L(Exit13):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 5(%ecx), %xmm0
+ movlpd %xmm0, 5(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit14):
+ movb %bh, 14(%edx)
+L(Exit14):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 6(%ecx), %xmm0
+ movlpd %xmm0, 6(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit15):
+ movb %bh, 15(%edx)
+L(Exit15):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit16):
+ movb %bh, 16(%edx)
+L(Exit16):
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+# ifdef USE_AS_STRNCPY
+
+ CFI_PUSH(%esi)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %ebx
+ add %esi, %ecx
+ lea (%esi, %edx), %esi
+ lea -9(%ebx), %edx
+ and $1<<7, %dh
+ or %al, %dh
+ test %dh, %dh
+ lea (%esi), %edx
+ POP (%esi)
+ jz L(ExitHighCase2)
+
+ test $0x01, %al
+ jnz L(Exit1)
+ cmp $1, %ebx
+ je L(StrncatExit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ cmp $2, %ebx
+ je L(StrncatExit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ cmp $3, %ebx
+ je L(StrncatExit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ cmp $4, %ebx
+ je L(StrncatExit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ cmp $5, %ebx
+ je L(StrncatExit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ cmp $6, %ebx
+ je L(StrncatExit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ cmp $7, %ebx
+ je L(StrncatExit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ lea 7(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+ xor %cl, %cl
+ movb %cl, (%eax)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitHighCase2):
+ test $0x01, %ah
+ jnz L(Exit9)
+ cmp $9, %ebx
+ je L(StrncatExit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ cmp $10, %ebx
+ je L(StrncatExit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ cmp $11, %ebx
+ je L(StrncatExit11)
+ test $0x8, %ah
+ jnz L(Exit12)
+ cmp $12, %ebx
+ je L(StrncatExit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ cmp $13, %ebx
+ je L(StrncatExit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ cmp $14, %ebx
+ je L(StrncatExit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ cmp $15, %ebx
+ je L(StrncatExit15)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm1, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ CFI_PUSH(%esi)
+
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase3):
+ add $16, %ebx
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+
+ cmp $8, %ebx
+ ja L(ExitHighCase3)
+ cmp $1, %ebx
+ je L(StrncatExit1)
+ cmp $2, %ebx
+ je L(StrncatExit2)
+ cmp $3, %ebx
+ je L(StrncatExit3)
+ cmp $4, %ebx
+ je L(StrncatExit4)
+ cmp $5, %ebx
+ je L(StrncatExit5)
+ cmp $6, %ebx
+ je L(StrncatExit6)
+ cmp $7, %ebx
+ je L(StrncatExit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movb %bh, 8(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitHighCase3):
+ cmp $9, %ebx
+ je L(StrncatExit9)
+ cmp $10, %ebx
+ je L(StrncatExit10)
+ cmp $11, %ebx
+ je L(StrncatExit11)
+ cmp $12, %ebx
+ je L(StrncatExit12)
+ cmp $13, %ebx
+ je L(StrncatExit13)
+ cmp $14, %ebx
+ je L(StrncatExit14)
+ cmp $15, %ebx
+ je L(StrncatExit15)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm1, 8(%edx)
+ movb %bh, 16(%edx)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit0):
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit15Bytes):
+ cmp $9, %ebx
+ je L(StrncatExit9)
+ cmpb $0, 9(%ecx)
+ jz L(Exit10)
+ cmp $10, %ebx
+ je L(StrncatExit10)
+ cmpb $0, 10(%ecx)
+ jz L(Exit11)
+ cmp $11, %ebx
+ je L(StrncatExit11)
+ cmpb $0, 11(%ecx)
+ jz L(Exit12)
+ cmp $12, %ebx
+ je L(StrncatExit12)
+ cmpb $0, 12(%ecx)
+ jz L(Exit13)
+ cmp $13, %ebx
+ je L(StrncatExit13)
+ cmpb $0, 13(%ecx)
+ jz L(Exit14)
+ cmp $14, %ebx
+ je L(StrncatExit14)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+ lea 14(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+ movb %bh, (%eax)
+ movl %edi, %eax
+ RETURN1
+
+ .p2align 4
+L(StrncatExit8Bytes):
+ cmpb $0, (%ecx)
+ jz L(Exit1)
+ cmp $1, %ebx
+ je L(StrncatExit1)
+ cmpb $0, 1(%ecx)
+ jz L(Exit2)
+ cmp $2, %ebx
+ je L(StrncatExit2)
+ cmpb $0, 2(%ecx)
+ jz L(Exit3)
+ cmp $3, %ebx
+ je L(StrncatExit3)
+ cmpb $0, 3(%ecx)
+ jz L(Exit4)
+ cmp $4, %ebx
+ je L(StrncatExit4)
+ cmpb $0, 4(%ecx)
+ jz L(Exit5)
+ cmp $5, %ebx
+ je L(StrncatExit5)
+ cmpb $0, 5(%ecx)
+ jz L(Exit6)
+ cmp $6, %ebx
+ je L(StrncatExit6)
+ cmpb $0, 6(%ecx)
+ jz L(Exit7)
+ cmp $7, %ebx
+ je L(StrncatExit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ lea 7(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+ movb %bh, (%eax)
+ movl %edi, %eax
+ RETURN1
+
+# endif
+END (STRCAT)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcat.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcat.S
new file mode 100644
index 0000000000..8412cb6f23
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcat.S
@@ -0,0 +1,92 @@
+/* Multiple versions of strcat
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef USE_AS_STRNCAT
+# ifndef STRCAT
+# define STRCAT strcat
+# endif
+#endif
+
+#ifdef USE_AS_STRNCAT
+# define STRCAT_SSSE3 __strncat_ssse3
+# define STRCAT_SSE2 __strncat_sse2
+# define STRCAT_IA32 __strncat_ia32
+# define __GI_STRCAT __GI_strncat
+#else
+# define STRCAT_SSSE3 __strcat_ssse3
+# define STRCAT_SSE2 __strcat_sse2
+# define STRCAT_IA32 __strcat_ia32
+# define __GI_STRCAT __GI_strcat
+#endif
+
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncat in static library since we
+ need strncat before the initialization happened. */
+#if IS_IN (libc)
+
+ .text
+ENTRY(STRCAT)
+ .type STRCAT, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (STRCAT_IA32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (STRCAT_SSE2)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (STRCAT_SSSE3)
+2: ret
+END(STRCAT)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCAT_IA32, @function; \
+ .align 16; \
+ .globl STRCAT_IA32; \
+ .hidden STRCAT_IA32; \
+ STRCAT_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strcat calls through a PLT.
+ The speedup we get from using SSSE3 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
+
+# endif
+#endif
+
+#ifndef USE_AS_STRNCAT
+# include "../../strcat.S"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
new file mode 100644
index 0000000000..95fd7c084e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
@@ -0,0 +1,158 @@
+/* strchr with SSE2 with bsf
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 8
+# define ENTRANCE PUSH(%edi)
+# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
+
+# define STR1 PARMS
+# define STR2 STR1+4
+
+ .text
+ENTRY (__strchr_sse2_bsf)
+
+ ENTRANCE
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+ pxor %xmm2, %xmm2
+ mov %ecx, %edi
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+ /* ECX has OFFSET. */
+ and $15, %ecx
+ pshufd $0, %xmm1, %xmm1
+ je L(loop)
+
+/* Handle unaligned string. */
+ and $-16, %edi
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm2, %edx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ /* Remove the leading bytes. */
+ sarl %cl, %edx
+ sarl %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+ /* Check which byte is a match. */
+ bsf %eax, %eax
+ /* Is there a NULL? */
+ test %edx, %edx
+ je L(unaligned_match)
+ bsf %edx, %edx
+ cmpl %edx, %eax
+ /* Return NULL if NULL comes first. */
+ ja L(return_null)
+L(unaligned_match):
+ add %edi, %eax
+ add %ecx, %eax
+ RETURN
+
+ .p2align 4
+L(unaligned_no_match):
+ test %edx, %edx
+ jne L(return_null)
+ pxor %xmm2, %xmm2
+
+ add $16, %edi
+
+ .p2align 4
+/* Loop start on aligned string. */
+L(loop):
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+ jmp L(loop)
+
+L(matches):
+ pmovmskb %xmm2, %edx
+ test %eax, %eax
+ jz L(return_null)
+ bsf %eax, %eax
+ /* There is a match. First find where NULL is. */
+ test %edx, %edx
+ je L(match)
+ bsf %edx, %ecx
+ /* Check if NULL comes first. */
+ cmpl %ecx, %eax
+ ja L(return_null)
+L(match):
+ sub $16, %edi
+ add %edi, %eax
+ RETURN
+
+/* Return NULL. */
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ RETURN
+
+END (__strchr_sse2_bsf)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2.S
new file mode 100644
index 0000000000..1f9e875b04
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strchr-sse2.S
@@ -0,0 +1,348 @@
+/* strchr SSE2 without bsf
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 8
+# define ENTRANCE PUSH(%edi)
+# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
+
+# define STR1 PARMS
+# define STR2 STR1+4
+
+ atom_text_section
+ENTRY (__strchr_sse2)
+
+ ENTRANCE
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+ pxor %xmm2, %xmm2
+ mov %ecx, %edi
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+ /* ECX has OFFSET. */
+ and $15, %ecx
+ pshufd $0, %xmm1, %xmm1
+ je L(loop)
+
+/* Handle unaligned string. */
+ and $-16, %edi
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm2, %edx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ /* Remove the leading bytes. */
+ sarl %cl, %edx
+ sarl %cl, %eax
+ test %eax, %eax
+ jz L(unaligned_no_match)
+ /* Check which byte is a match. */
+ /* Is there a NULL? */
+ add %ecx, %edi
+ test %edx, %edx
+ jz L(match_case1)
+ jmp L(match_case2)
+
+ .p2align 4
+L(unaligned_no_match):
+ test %edx, %edx
+ jne L(return_null)
+
+ pxor %xmm2, %xmm2
+ add $16, %edi
+
+ .p2align 4
+/* Loop start on aligned string. */
+L(loop):
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+ test %edx, %edx
+ jnz L(return_null)
+ add $16, %edi
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+ test %edx, %edx
+ jnz L(return_null)
+ add $16, %edi
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+ test %edx, %edx
+ jnz L(return_null)
+ add $16, %edi
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+ test %edx, %edx
+ jnz L(return_null)
+ add $16, %edi
+ jmp L(loop)
+
+L(matches):
+ /* There is a match. First find where NULL is. */
+ test %edx, %edx
+ jz L(match_case1)
+
+ .p2align 4
+L(match_case2):
+ test %al, %al
+ jz L(match_higth_case2)
+
+ mov %al, %cl
+ and $15, %cl
+ jnz L(match_case2_4)
+
+ mov %dl, %ch
+ and $15, %ch
+ jnz L(return_null)
+
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x10, %dl
+ jnz L(return_null)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x20, %dl
+ jnz L(return_null)
+ test $0x40, %al
+ jnz L(Exit7)
+ test $0x40, %dl
+ jnz L(return_null)
+ lea 7(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_case2_4):
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x01, %dl
+ jnz L(return_null)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x02, %dl
+ jnz L(return_null)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x04, %dl
+ jnz L(return_null)
+ lea 3(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_higth_case2):
+ test %dl, %dl
+ jnz L(return_null)
+
+ mov %ah, %cl
+ and $15, %cl
+ jnz L(match_case2_12)
+
+ mov %dh, %ch
+ and $15, %ch
+ jnz L(return_null)
+
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x10, %dh
+ jnz L(return_null)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x20, %dh
+ jnz L(return_null)
+ test $0x40, %ah
+ jnz L(Exit15)
+ test $0x40, %dh
+ jnz L(return_null)
+ lea 15(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_case2_12):
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x01, %dh
+ jnz L(return_null)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x02, %dh
+ jnz L(return_null)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x04, %dh
+ jnz L(return_null)
+ lea 11(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_case1):
+ test %al, %al
+ jz L(match_higth_case1)
+
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ lea 7(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_higth_case1):
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x08, %ah
+ jnz L(Exit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ lea 15(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit1):
+ lea (%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit2):
+ lea 1(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit3):
+ lea 2(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit4):
+ lea 3(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit5):
+ lea 4(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit6):
+ lea 5(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit7):
+ lea 6(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit9):
+ lea 8(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit10):
+ lea 9(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit11):
+ lea 10(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit12):
+ lea 11(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit13):
+ lea 12(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit14):
+ lea 13(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit15):
+ lea 14(%edi), %eax
+ RETURN
+
+/* Return NULL. */
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ RETURN
+
+END (__strchr_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strchr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strchr.S
new file mode 100644
index 0000000000..5b97b1c767
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strchr.S
@@ -0,0 +1,57 @@
+/* Multiple versions of strchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(strchr)
+ .type strchr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strchr_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf)
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strchr_sse2)
+2: ret
+END(strchr)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strchr_ia32, @function; \
+ .globl __strchr_ia32; \
+ .p2align 4; \
+ __strchr_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strchr_ia32, .-__strchr_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strchr; __GI_strchr = __strchr_ia32
+#endif
+
+#include "../../i586/strchr.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-sse4.S
new file mode 100644
index 0000000000..cd26058671
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -0,0 +1,804 @@
+/* strcmp with SSE4.2
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_STRNCMP
+# ifndef STRCMP
+# define STRCMP __strncmp_sse4_2
+# endif
+# define STR1 8
+# define STR2 STR1+4
+# define CNT STR2+4
+# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
+# define REM %ebp
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+# ifndef STRCMP
+# define STRCMP __strcasecmp_l_sse4_2
+# endif
+# ifdef PIC
+# define STR1 12
+# else
+# define STR1 8
+# endif
+# define STR2 STR1+4
+# define LOCALE 12 /* Loaded before the adjustment. */
+# ifdef PIC
+# define RETURN POP (%edi); POP (%ebx); ret; \
+ .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi)
+# else
+# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi)
+# endif
+# define NONASCII __strcasecmp_nonascii
+#elif defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+# ifndef STRCMP
+# define STRCMP __strncasecmp_l_sse4_2
+# endif
+# ifdef PIC
+# define STR1 16
+# else
+# define STR1 12
+# endif
+# define STR2 STR1+4
+# define CNT STR2+4
+# define LOCALE 16 /* Loaded before the adjustment. */
+# ifdef PIC
+# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \
+ .p2align 4; \
+ CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi)
+# else
+# define RETURN POP (%edi); POP (REM); ret; \
+ .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi)
+# endif
+# define REM %ebp
+# define NONASCII __strncasecmp_nonascii
+#else
+# ifndef STRCMP
+# define STRCMP __strcmp_sse4_2
+# endif
+# define STR1 4
+# define STR2 STR1+4
+# define RETURN ret; .p2align 4
+#endif
+
+ .section .text.sse4.2,"ax",@progbits
+
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp_sse4_2)
+# ifdef PIC
+ PUSH (%ebx)
+ LOAD_PIC_REG(bx)
+ movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ addl %gs:0, %eax
+ movl (%eax), %eax
+# else
+ movl %gs:(%eax), %eax
+# endif
+# else
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl %gs:0, %eax
+ movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
+# else
+ movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
+# endif
+# endif
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+ movl (%eax), %eax
+# endif
+ testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+# ifdef PIC
+ je L(ascii)
+ POP (%ebx)
+ jmp __strcasecmp_nonascii
+# else
+ jne __strcasecmp_nonascii
+ jmp L(ascii)
+# endif
+END (__strcasecmp_sse4_2)
+#endif
+
+#ifdef USE_AS_STRNCASECMP_L
+ENTRY (__strncasecmp_sse4_2)
+# ifdef PIC
+ PUSH (%ebx)
+ LOAD_PIC_REG(bx)
+ movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ addl %gs:0, %eax
+ movl (%eax), %eax
+# else
+ movl %gs:(%eax), %eax
+# endif
+# else
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl %gs:0, %eax
+ movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
+# else
+ movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
+# endif
+# endif
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+ movl (%eax), %eax
+# endif
+ testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+# ifdef PIC
+ je L(ascii)
+ POP (%ebx)
+ jmp __strncasecmp_nonascii
+# else
+ jne __strncasecmp_nonascii
+ jmp L(ascii)
+# endif
+END (__strncasecmp_sse4_2)
+#endif
+
+ ENTRY (STRCMP)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movl LOCALE(%esp), %eax
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+ movl (%eax), %eax
+# endif
+ testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+ jne NONASCII
+
+# ifdef PIC
+ PUSH (%ebx)
+ LOAD_PIC_REG(bx)
+# endif
+L(ascii):
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+.Lbelowupper:
+ .quad 0x4040404040404040
+ .quad 0x4040404040404040
+.Ltopupper:
+ .quad 0x5b5b5b5b5b5b5b5b
+ .quad 0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .previous
+
+# ifdef PIC
+# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
+# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
+# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
+# else
+# define UCLOW_reg .Lbelowupper
+# define UCHIGH_reg .Ltopupper
+# define LCQWORD_reg .Ltouppermask
+# endif
+#endif
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ PUSH (REM)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ PUSH (%edi)
+#endif
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %eax
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ movl CNT(%esp), REM
+ test REM, REM
+ je L(eq)
+#endif
+ mov %dx, %cx
+ and $0xfff, %cx
+ cmp $0xff0, %cx
+ ja L(first4bytes)
+ movdqu (%edx), %xmm2
+ mov %eax, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(first4bytes)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# define TOLOWER(reg1, reg2) \
+ movdqa reg1, %xmm3; \
+ movdqa UCHIGH_reg, %xmm4; \
+ movdqa reg2, %xmm5; \
+ movdqa UCHIGH_reg, %xmm6; \
+ pcmpgtb UCLOW_reg, %xmm3; \
+ pcmpgtb reg1, %xmm4; \
+ pcmpgtb UCLOW_reg, %xmm5; \
+ pcmpgtb reg2, %xmm6; \
+ pand %xmm4, %xmm3; \
+ pand %xmm6, %xmm5; \
+ pand LCQWORD_reg, %xmm3; \
+ pand LCQWORD_reg, %xmm5; \
+ por %xmm3, reg1; \
+ por %xmm5, reg2
+
+ movdqu (%eax), %xmm1
+ TOLOWER (%xmm2, %xmm1)
+ movd %xmm2, %ecx
+ movd %xmm1, %edi
+ movdqa %xmm2, %xmm3
+ movdqa %xmm1, %xmm4
+ cmpl %edi, %ecx
+#else
+# define TOLOWER(reg1, reg)
+
+ movd %xmm2, %ecx
+ cmp (%eax), %ecx
+#endif
+ jne L(less4bytes)
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
+ movdqu (%eax), %xmm1
+#endif
+ pxor %xmm2, %xmm1
+ pxor %xmm0, %xmm0
+ ptest %xmm1, %xmm0
+ jnc L(less16bytes)
+ pcmpeqb %xmm0, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ sub $16, REM
+ jbe L(eq)
+#endif
+ add $16, %edx
+ add $16, %eax
+L(first4bytes):
+ movzbl (%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl (%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, (%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $1, REM
+ je L(eq)
+#endif
+
+ movzbl 1(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 1(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 1(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $2, REM
+ je L(eq)
+#endif
+ movzbl 2(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 2(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 2(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $3, REM
+ je L(eq)
+#endif
+ movzbl 3(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 3(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 3(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $4, REM
+ je L(eq)
+#endif
+ movzbl 4(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 4(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 4(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $5, REM
+ je L(eq)
+#endif
+ movzbl 5(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 5(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 5(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $6, REM
+ je L(eq)
+#endif
+ movzbl 6(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 6(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 6(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $7, REM
+ je L(eq)
+#endif
+ movzbl 7(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 7(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 7(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ sub $8, REM
+ je L(eq)
+#endif
+ add $8, %eax
+ add $8, %edx
+
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
+ PUSH (%edi)
+#endif
+ PUSH (%esi)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cfi_remember_state
+#endif
+ mov %edx, %edi
+ mov %eax, %esi
+ xorl %eax, %eax
+L(check_offset):
+ movl %edi, %edx
+ movl %esi, %ecx
+ andl $0xfff, %edx
+ andl $0xfff, %ecx
+ cmpl %edx, %ecx
+ cmovl %edx, %ecx
+ lea -0xff0(%ecx), %edx
+ sub %edx, %edi
+ sub %edx, %esi
+ testl %edx, %edx
+ jg L(crosspage)
+L(loop):
+ movdqu (%esi,%edx), %xmm2
+ movdqu (%edi,%edx), %xmm1
+ TOLOWER (%xmm2, %xmm1)
+ pcmpistri $0x1a, %xmm2, %xmm1
+ jbe L(end)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ sub $16, REM
+ jbe L(more16byteseq)
+#endif
+
+ add $16, %edx
+ jle L(loop)
+L(crosspage):
+ movzbl (%edi,%edx), %eax
+ movzbl (%esi,%edx), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+# endif
+#endif
+ subl %ecx, %eax
+ jne L(ret)
+ testl %ecx, %ecx
+ je L(ret)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ sub $1, REM
+ jbe L(more16byteseq)
+#endif
+ inc %edx
+ cmp $15, %edx
+ jle L(crosspage)
+ add %edx, %edi
+ add %edx, %esi
+ jmp L(check_offset)
+
+ .p2align 4
+L(end):
+ jnc L(ret)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ sub %ecx, REM
+ jbe L(more16byteseq)
+#endif
+ lea (%ecx,%edx), %ecx
+ movzbl (%edi,%ecx), %eax
+ movzbl (%esi,%ecx), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+# endif
+#endif
+ subl %ecx, %eax
+L(ret):
+ POP (%esi)
+ POP (%edi)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ POP (REM)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ POP (%ebx)
+# endif
+#endif
+ ret
+
+ .p2align 4
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cfi_restore_state
+L(more16byteseq):
+ POP (%esi)
+# ifdef USE_AS_STRNCMP
+ POP (%edi)
+# endif
+#endif
+L(eq):
+ xorl %eax, %eax
+ RETURN
+
+L(neq):
+ mov $1, %eax
+ ja L(neq_bigger)
+ neg %eax
+L(neq_bigger):
+ RETURN
+
+L(less16bytes):
+ add $0xfefefeff, %ecx
+ jnc L(less4bytes)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movd %xmm3, %edi
+ xor %edi, %ecx
+#else
+ xor (%edx), %ecx
+#endif
+ or $0xfefefeff, %ecx
+ add $1, %ecx
+ jnz L(less4bytes)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $4, REM
+ jbe L(eq)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ psrldq $4, %xmm3
+ psrldq $4, %xmm4
+ movd %xmm3, %ecx
+ movd %xmm4, %edi
+ cmp %edi, %ecx
+ mov %ecx, %edi
+#else
+ mov 4(%edx), %ecx
+ cmp 4(%eax), %ecx
+#endif
+ jne L(more4bytes)
+ add $0xfefefeff, %ecx
+ jnc L(more4bytes)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ xor %edi, %ecx
+#else
+ xor 4(%edx), %ecx
+#endif
+ or $0xfefefeff, %ecx
+ add $1, %ecx
+ jnz L(more4bytes)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ sub $8, REM
+ jbe L(eq)
+#endif
+
+ add $8, %edx
+ add $8, %eax
+L(less4bytes):
+
+ movzbl (%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl (%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, (%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $1, REM
+ je L(eq)
+#endif
+ movzbl 1(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 1(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 1(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $2, REM
+ je L(eq)
+#endif
+
+ movzbl 2(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 2(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 2(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $3, REM
+ je L(eq)
+#endif
+ movzbl 3(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 3(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 3(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+L(more4bytes):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $4, REM
+ je L(eq)
+#endif
+ movzbl 4(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 4(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 4(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $5, REM
+ je L(eq)
+#endif
+ movzbl 5(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 5(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 5(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $6, REM
+ je L(eq)
+#endif
+ movzbl 6(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 6(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 6(%edx)
+#endif
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $7, REM
+ je L(eq)
+#endif
+ movzbl 7(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movzbl 7(%edx), %edi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+ cmpl %ecx, %edi
+#else
+ cmpb %cl, 7(%edx)
+#endif
+ jne L(neq)
+ jmp L(eq)
+
+END (STRCMP)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
new file mode 100644
index 0000000000..b25cc3e068
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
@@ -0,0 +1,2810 @@
+/* strcmp with SSSE3
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_STRNCMP
+# ifndef STRCMP
+# define STRCMP __strncmp_ssse3
+# endif
+# define STR1 8
+# define STR2 STR1+4
+# define CNT STR2+4
+# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ mov $16, %esi; \
+ sub %ecx, %esi; \
+ cmp %esi, REM; \
+ jbe L(more8byteseq); \
+ sub %esi, REM
+# define FLAGS %ebx
+# define REM %ebp
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+# ifndef STRCMP
+# define STRCMP __strcasecmp_l_ssse3
+# endif
+# ifdef PIC
+# define STR1 8
+# else
+# define STR1 4
+# endif
+# define STR2 STR1+4
+# define LOCALE 12 /* Loaded before the adjustment. */
+# ifdef PIC
+# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx)
+# else
+# define RETURN ret; .p2align 4
+# endif
+# define UPDATE_STRNCMP_COUNTER
+# define FLAGS (%esp)
+# define NONASCII __strcasecmp_nonascii
+#elif defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+# ifndef STRCMP
+# define STRCMP __strncasecmp_l_ssse3
+# endif
+# ifdef PIC
+# define STR1 12
+# else
+# define STR1 8
+# endif
+# define STR2 STR1+4
+# define CNT STR2+4
+# define LOCALE 16 /* Loaded before the adjustment. */
+# ifdef PIC
+# define RETURN POP (REM); POP (%ebx); ret; \
+ .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM)
+# else
+# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
+# endif
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ mov $16, %esi; \
+ sub %ecx, %esi; \
+ cmp %esi, REM; \
+ jbe L(more8byteseq); \
+ sub %esi, REM
+# define FLAGS (%esp)
+# define REM %ebp
+# define NONASCII __strncasecmp_nonascii
+#else
+# ifndef STRCMP
+# define STRCMP __strcmp_ssse3
+# endif
+# define STR1 4
+# define STR2 STR1+4
+# define RETURN ret; .p2align 4
+# define UPDATE_STRNCMP_COUNTER
+# define FLAGS %ebx
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp_ssse3)
+# ifdef PIC
+ PUSH (%ebx)
+ LOAD_PIC_REG(bx)
+ movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ addl %gs:0, %eax
+ movl (%eax), %eax
+# else
+ movl %gs:(%eax), %eax
+# endif
+# else
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl %gs:0, %eax
+ movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
+# else
+ movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
+# endif
+# endif
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+ movl (%eax), %eax
+# endif
+ testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+# ifdef PIC
+ je L(ascii)
+ POP (%ebx)
+ jmp __strcasecmp_nonascii
+# else
+ jne __strcasecmp_nonascii
+ jmp L(ascii)
+# endif
+END (__strcasecmp_ssse3)
+#endif
+
+#ifdef USE_AS_STRNCASECMP_L
+ENTRY (__strncasecmp_ssse3)
+# ifdef PIC
+ PUSH (%ebx)
+ LOAD_PIC_REG(bx)
+ movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ addl %gs:0, %eax
+ movl (%eax), %eax
+# else
+ movl %gs:(%eax), %eax
+# endif
+# else
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl %gs:0, %eax
+ movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
+# else
+ movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
+# endif
+# endif
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+ movl (%eax), %eax
+# endif
+ testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+# ifdef PIC
+ je L(ascii)
+ POP (%ebx)
+ jmp __strncasecmp_nonascii
+# else
+ jne __strncasecmp_nonascii
+ jmp L(ascii)
+# endif
+END (__strncasecmp_ssse3)
+#endif
+
+ENTRY (STRCMP)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movl LOCALE(%esp), %eax
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+ movl (%eax), %eax
+# endif
+ testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+ jne NONASCII
+
+# ifdef PIC
+ PUSH (%ebx)
+ LOAD_PIC_REG(bx)
+# endif
+L(ascii):
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+.Lbelowupper:
+ .quad 0x4040404040404040
+ .quad 0x4040404040404040
+.Ltopupper:
+ .quad 0x5b5b5b5b5b5b5b5b
+ .quad 0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .previous
+
+# ifdef PIC
+# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
+# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
+# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
+# else
+# define UCLOW_reg .Lbelowupper
+# define UCHIGH_reg .Ltopupper
+# define LCQWORD_reg .Ltouppermask
+# endif
+#endif
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ PUSH (REM)
+#endif
+
+ movl STR1(%esp), %edx
+ movl STR2(%esp), %eax
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ movl CNT(%esp), REM
+ cmp $16, REM
+ jb L(less16bytes_sncmp)
+#elif !defined USE_AS_STRCASECMP_L
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ add $8, %edx
+ add $8, %eax
+#endif
+ movl %edx, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(crosspage)
+ mov %eax, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(crosspage)
+ pxor %xmm0, %xmm0
+ movlpd (%eax), %xmm1
+ movlpd (%edx), %xmm2
+ movhpd 8(%eax), %xmm1
+ movhpd 8(%edx), %xmm2
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# define TOLOWER(reg1, reg2) \
+ movdqa reg1, %xmm5; \
+ movdqa reg2, %xmm7; \
+ movdqa UCHIGH_reg, %xmm6; \
+ pcmpgtb UCLOW_reg, %xmm5; \
+ pcmpgtb UCLOW_reg, %xmm7; \
+ pcmpgtb reg1, %xmm6; \
+ pand %xmm6, %xmm5; \
+ movdqa UCHIGH_reg, %xmm6; \
+ pcmpgtb reg2, %xmm6; \
+ pand %xmm6, %xmm7; \
+ pand LCQWORD_reg, %xmm5; \
+ por %xmm5, reg1; \
+ pand LCQWORD_reg, %xmm7; \
+ por %xmm7, reg2
+ TOLOWER (%xmm1, %xmm2)
+#else
+# define TOLOWER(reg1, reg2)
+#endif
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %ecx
+ sub $0xffff, %ecx
+ jnz L(less16bytes)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(eq)
+#endif
+ add $16, %eax
+ add $16, %edx
+
+L(crosspage):
+
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
+ PUSH (FLAGS)
+#endif
+ PUSH (%edi)
+ PUSH (%esi)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+#endif
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cfi_remember_state
+#endif
+
+ movl %edx, %edi
+ movl %eax, %ecx
+ and $0xf, %ecx
+ and $0xf, %edi
+ xor %ecx, %eax
+ xor %edi, %edx
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
+ xor FLAGS, FLAGS
+#endif
+ cmp %edi, %ecx
+ je L(ashr_0)
+ ja L(bigger)
+ orl $0x20, FLAGS
+ xchg %edx, %eax
+ xchg %ecx, %edi
+L(bigger):
+ lea 15(%edi), %edi
+ sub %ecx, %edi
+ cmp $8, %edi
+ jle L(ashr_less_8)
+ cmp $14, %edi
+ je L(ashr_15)
+ cmp $13, %edi
+ je L(ashr_14)
+ cmp $12, %edi
+ je L(ashr_13)
+ cmp $11, %edi
+ je L(ashr_12)
+ cmp $10, %edi
+ je L(ashr_11)
+ cmp $9, %edi
+ je L(ashr_10)
+L(ashr_less_8):
+ je L(ashr_9)
+ cmp $7, %edi
+ je L(ashr_8)
+ cmp $6, %edi
+ je L(ashr_7)
+ cmp $5, %edi
+ je L(ashr_6)
+ cmp $4, %edi
+ je L(ashr_5)
+ cmp $3, %edi
+ je L(ashr_4)
+ cmp $2, %edi
+ je L(ashr_3)
+ cmp $1, %edi
+ je L(ashr_2)
+ cmp $0, %edi
+ je L(ashr_1)
+
+/*
+ * The following cases will be handled by ashr_0
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(0~15) n(0~15) 15(15+ n-n) ashr_0
+ */
+ .p2align 4
+L(ashr_0):
+ mov $0xffff, %esi
+ movdqa (%eax), %xmm1
+ pxor %xmm0, %xmm0
+ pcmpeqb %xmm1, %xmm0
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movdqa (%edx), %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm2, %xmm1
+#else
+ pcmpeqb (%edx), %xmm1
+#endif
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ mov %ecx, %edi
+ jne L(less32bytes)
+ UPDATE_STRNCMP_COUNTER
+ movl $0x10, FLAGS
+ mov $0x10, %ecx
+ pxor %xmm0, %xmm0
+ .p2align 4
+L(loop_ashr_0):
+ movdqa (%eax, %ecx), %xmm1
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ movdqa (%edx, %ecx), %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+#else
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb (%edx, %ecx), %xmm1
+#endif
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ jmp L(loop_ashr_0)
+
+/*
+ * The following cases will be handled by ashr_1
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(15) n -15 0(15 +(n-15) - n) ashr_1
+ */
+ .p2align 4
+L(ashr_1):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $15, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -15(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $1, FLAGS
+ lea 1(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_1):
+ add $16, %edi
+ jg L(nibble_ashr_1)
+
+L(gobble_ashr_1):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $1, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_1)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $1, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_1)
+
+ .p2align 4
+L(nibble_ashr_1):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfffe, %esi
+ jnz L(ashr_1_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $15, REM
+ jbe L(ashr_1_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_1)
+
+ .p2align 4
+L(ashr_1_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $1, %xmm0
+ psrldq $1, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_2
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
+ */
+ .p2align 4
+L(ashr_2):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $14, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -14(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $2, FLAGS
+ lea 2(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_2):
+ add $16, %edi
+ jg L(nibble_ashr_2)
+
+L(gobble_ashr_2):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $2, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_2)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $2, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_2)
+
+ .p2align 4
+L(nibble_ashr_2):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfffc, %esi
+ jnz L(ashr_2_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $14, REM
+ jbe L(ashr_2_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_2)
+
+ .p2align 4
+L(ashr_2_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $2, %xmm0
+ psrldq $2, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_3
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
+ */
+ .p2align 4
+L(ashr_3):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $13, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -13(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $3, FLAGS
+ lea 3(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_3):
+ add $16, %edi
+ jg L(nibble_ashr_3)
+
+L(gobble_ashr_3):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $3, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_3)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $3, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_3)
+
+ .p2align 4
+L(nibble_ashr_3):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfff8, %esi
+ jnz L(ashr_3_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $13, REM
+ jbe L(ashr_3_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_3)
+
+ .p2align 4
+L(ashr_3_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $3, %xmm0
+ psrldq $3, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_4
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
+ */
+ .p2align 4
+L(ashr_4):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $12, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -12(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $4, FLAGS
+ lea 4(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_4):
+ add $16, %edi
+ jg L(nibble_ashr_4)
+
+L(gobble_ashr_4):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $4, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_4)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $4, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_4)
+
+ .p2align 4
+L(nibble_ashr_4):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfff0, %esi
+ jnz L(ashr_4_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $12, REM
+ jbe L(ashr_4_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_4)
+
+ .p2align 4
+L(ashr_4_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $4, %xmm0
+ psrldq $4, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_5
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
+ */
+ .p2align 4
+L(ashr_5):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $11, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -11(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $5, FLAGS
+ lea 5(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_5):
+ add $16, %edi
+ jg L(nibble_ashr_5)
+
+L(gobble_ashr_5):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $5, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_5)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $5, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_5)
+
+ .p2align 4
+L(nibble_ashr_5):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xffe0, %esi
+ jnz L(ashr_5_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $11, REM
+ jbe L(ashr_5_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_5)
+
+ .p2align 4
+L(ashr_5_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $5, %xmm0
+ psrldq $5, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_6
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
+ */
+
+ .p2align 4
+L(ashr_6):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $10, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -10(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $6, FLAGS
+ lea 6(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_6):
+ add $16, %edi
+ jg L(nibble_ashr_6)
+
+L(gobble_ashr_6):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $6, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_6)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $6, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_6)
+
+ .p2align 4
+L(nibble_ashr_6):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xffc0, %esi
+ jnz L(ashr_6_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $10, REM
+ jbe L(ashr_6_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_6)
+
+ .p2align 4
+L(ashr_6_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $6, %xmm0
+ psrldq $6, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_7
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
+ */
+
+ .p2align 4
+L(ashr_7):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $9, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -9(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $7, FLAGS
+ lea 8(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_7):
+ add $16, %edi
+ jg L(nibble_ashr_7)
+
+L(gobble_ashr_7):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $7, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_7)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $7, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_7)
+
+ .p2align 4
+L(nibble_ashr_7):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xff80, %esi
+ jnz L(ashr_7_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $9, REM
+ jbe L(ashr_7_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_7)
+
+ .p2align 4
+L(ashr_7_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $7, %xmm0
+ psrldq $7, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_8
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
+ */
+ .p2align 4
+L(ashr_8):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $8, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -8(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $8, FLAGS
+ lea 8(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_8):
+ add $16, %edi
+ jg L(nibble_ashr_8)
+
+L(gobble_ashr_8):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $8, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_8)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $8, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_8)
+
+ .p2align 4
+L(nibble_ashr_8):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xff00, %esi
+ jnz L(ashr_8_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $8, REM
+ jbe L(ashr_8_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_8)
+
+ .p2align 4
+L(ashr_8_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $8, %xmm0
+ psrldq $8, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_9
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
+ */
+ .p2align 4
+L(ashr_9):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $7, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -7(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $9, FLAGS
+ lea 9(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_9):
+ add $16, %edi
+ jg L(nibble_ashr_9)
+
+L(gobble_ashr_9):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $9, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_9)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $9, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_9)
+
+ .p2align 4
+L(nibble_ashr_9):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfe00, %esi
+ jnz L(ashr_9_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $7, REM
+ jbe L(ashr_9_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_9)
+
+ .p2align 4
+L(ashr_9_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $9, %xmm0
+ psrldq $9, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_10
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
+ */
+ .p2align 4
+L(ashr_10):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $6, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -6(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $10, FLAGS
+ lea 10(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_10):
+ add $16, %edi
+ jg L(nibble_ashr_10)
+
+L(gobble_ashr_10):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $10, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_10)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $10, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_10)
+
+ .p2align 4
+L(nibble_ashr_10):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfc00, %esi
+ jnz L(ashr_10_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $6, REM
+ jbe L(ashr_10_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_10)
+
+ .p2align 4
+L(ashr_10_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $10, %xmm0
+ psrldq $10, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_11
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
+ */
+ .p2align 4
+L(ashr_11):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $5, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -5(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $11, FLAGS
+ lea 11(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_11):
+ add $16, %edi
+ jg L(nibble_ashr_11)
+
+L(gobble_ashr_11):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $11, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_11)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $11, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_11)
+
+ .p2align 4
+L(nibble_ashr_11):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xf800, %esi
+ jnz L(ashr_11_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $5, REM
+ jbe L(ashr_11_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_11)
+
+ .p2align 4
+L(ashr_11_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $11, %xmm0
+ psrldq $11, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_12
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
+ */
+ .p2align 4
+L(ashr_12):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $4, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -4(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $12, FLAGS
+ lea 12(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_12):
+ add $16, %edi
+ jg L(nibble_ashr_12)
+
+L(gobble_ashr_12):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $12, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_12)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $12, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_12)
+
+ .p2align 4
+L(nibble_ashr_12):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xf000, %esi
+ jnz L(ashr_12_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $4, REM
+ jbe L(ashr_12_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_12)
+
+ .p2align 4
+L(ashr_12_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $12, %xmm0
+ psrldq $12, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_13
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
+ */
+ .p2align 4
+L(ashr_13):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -3(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $13, FLAGS
+ lea 13(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_13):
+ add $16, %edi
+ jg L(nibble_ashr_13)
+
+L(gobble_ashr_13):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $13, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_13)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $13, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_13)
+
+ .p2align 4
+L(nibble_ashr_13):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xe000, %esi
+ jnz L(ashr_13_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $3, REM
+ jbe L(ashr_13_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_13)
+
+ .p2align 4
+L(ashr_13_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $13, %xmm0
+ psrldq $13, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
+ */
+ .p2align 4
+L(ashr_14):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $2, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -2(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $14, FLAGS
+ lea 14(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_14):
+ add $16, %edi
+ jg L(nibble_ashr_14)
+
+L(gobble_ashr_14):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $14, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_14)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $14, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_14)
+
+ .p2align 4
+L(nibble_ashr_14):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xc000, %esi
+ jnz L(ashr_14_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $2, REM
+ jbe L(ashr_14_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_14)
+
+ .p2align 4
+L(ashr_14_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $14, %xmm0
+ psrldq $14, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
+ */
+
+ .p2align 4
+L(ashr_15):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $1, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -1(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ orl $15, FLAGS
+ lea 15(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_15):
+ add $16, %edi
+ jg L(nibble_ashr_15)
+
+L(gobble_ashr_15):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $15, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_15)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $15, %xmm3, %xmm2
+ TOLOWER (%xmm1, %xmm2)
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $16, REM
+ lea -16(REM), REM
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_15)
+
+ .p2align 4
+L(nibble_ashr_15):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0x8000, %esi
+ jnz L(ashr_15_exittail)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $1, REM
+ jbe L(ashr_15_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_15)
+
+ .p2align 4
+L(ashr_15_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $15, %xmm0
+ psrldq $15, %xmm3
+ jmp L(aftertail)
+
+ .p2align 4
+L(aftertail):
+ TOLOWER (%xmm1, %xmm3)
+ pcmpeqb %xmm3, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ not %esi
+L(exit):
+ mov FLAGS, %edi
+ and $0x1f, %edi
+ lea -16(%edi, %ecx), %edi
+L(less32bytes):
+ add %edi, %edx
+ add %ecx, %eax
+ testl $0x20, FLAGS
+ jz L(ret2)
+ xchg %eax, %edx
+
+ .p2align 4
+L(ret2):
+ mov %esi, %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+#endif
+ POP (%esi)
+ POP (%edi)
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
+ POP (FLAGS)
+#endif
+L(less16bytes):
+ test %cl, %cl
+ jz L(2next_8_bytes)
+
+ test $0x01, %cl
+ jnz L(Byte0)
+
+ test $0x02, %cl
+ jnz L(Byte1)
+
+ test $0x04, %cl
+ jnz L(Byte2)
+
+ test $0x08, %cl
+ jnz L(Byte3)
+
+ test $0x10, %cl
+ jnz L(Byte4)
+
+ test $0x20, %cl
+ jnz L(Byte5)
+
+ test $0x40, %cl
+ jnz L(Byte6)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $7, REM
+ jbe L(eq)
+#endif
+
+ movzx 7(%eax), %ecx
+ movzx 7(%edx), %eax
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte0):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $0, REM
+ jbe L(eq)
+#endif
+ movzx (%eax), %ecx
+ movzx (%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte1):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $1, REM
+ jbe L(eq)
+#endif
+ movzx 1(%eax), %ecx
+ movzx 1(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte2):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $2, REM
+ jbe L(eq)
+#endif
+ movzx 2(%eax), %ecx
+ movzx 2(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte3):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $3, REM
+ jbe L(eq)
+#endif
+ movzx 3(%eax), %ecx
+ movzx 3(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte4):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $4, REM
+ jbe L(eq)
+#endif
+ movzx 4(%eax), %ecx
+ movzx 4(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte5):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $5, REM
+ jbe L(eq)
+#endif
+ movzx 5(%eax), %ecx
+ movzx 5(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte6):
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $6, REM
+ jbe L(eq)
+#endif
+ movzx 6(%eax), %ecx
+ movzx 6(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+L(2next_8_bytes):
+ add $8, %eax
+ add $8, %edx
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $8, REM
+ lea -8(REM), REM
+ jbe L(eq)
+#endif
+
+ test $0x01, %ch
+ jnz L(Byte0)
+
+ test $0x02, %ch
+ jnz L(Byte1)
+
+ test $0x04, %ch
+ jnz L(Byte2)
+
+ test $0x08, %ch
+ jnz L(Byte3)
+
+ test $0x10, %ch
+ jnz L(Byte4)
+
+ test $0x20, %ch
+ jnz L(Byte5)
+
+ test $0x40, %ch
+ jnz L(Byte6)
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ cmp $7, REM
+ jbe L(eq)
+#endif
+ movzx 7(%eax), %ecx
+ movzx 7(%edx), %eax
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+# endif
+#endif
+
+ sub %ecx, %eax
+ RETURN
+
+#ifdef USE_AS_STRNCMP
+L(neq_sncmp):
+#endif
+L(neq):
+ mov $1, %eax
+ ja L(neq_bigger)
+ neg %eax
+L(neq_bigger):
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+#endif
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ POP (REM)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ POP (%ebx)
+# endif
+#endif
+ ret
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ .p2align 4
+ cfi_restore_state
+L(more8byteseq):
+
+# ifdef USE_AS_STRNCASECMP_L
+ addl $4, %esp
+ cfi_adjust_cfa_offset (-4)
+# endif
+ POP (%esi)
+ POP (%edi)
+# ifdef USE_AS_STRNCMP
+ POP (FLAGS)
+# endif
+#endif
+
+#ifdef USE_AS_STRNCMP
+L(eq_sncmp):
+#endif
+L(eq):
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ POP (REM)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+ POP (%ebx)
+# endif
+#endif
+ xorl %eax, %eax
+ ret
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+ .p2align 4
+# if defined USE_AS_STRNCASECMP_L && defined PIC
+ CFI_PUSH (%ebx)
+# endif
+ CFI_PUSH (REM)
+L(less16bytes_sncmp):
+# ifdef USE_AS_STRNCASECMP_L
+ PUSH (%esi)
+# endif
+ test REM, REM
+ jz L(eq_sncmp)
+
+ movzbl (%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl (%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, (%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $1, REM
+ je L(eq_sncmp)
+
+ movzbl 1(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 1(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 1(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $2, REM
+ je L(eq_sncmp)
+
+ movzbl 2(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 2(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 2(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $3, REM
+ je L(eq_sncmp)
+
+ movzbl 3(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 3(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 3(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $4, REM
+ je L(eq_sncmp)
+
+ movzbl 4(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 4(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 4(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $5, REM
+ je L(eq_sncmp)
+
+ movzbl 5(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 5(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 5(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $6, REM
+ je L(eq_sncmp)
+
+ movzbl 6(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 6(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 6(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $7, REM
+ je L(eq_sncmp)
+
+ movzbl 7(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 7(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 7(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+
+ cmp $8, REM
+ je L(eq_sncmp)
+
+ movzbl 8(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 8(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 8(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $9, REM
+ je L(eq_sncmp)
+
+ movzbl 9(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 9(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 9(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $10, REM
+ je L(eq_sncmp)
+
+ movzbl 10(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 10(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 10(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $11, REM
+ je L(eq_sncmp)
+
+ movzbl 11(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 11(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 11(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+
+ cmp $12, REM
+ je L(eq_sncmp)
+
+ movzbl 12(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 12(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 12(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $13, REM
+ je L(eq_sncmp)
+
+ movzbl 13(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 13(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 13(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $14, REM
+ je L(eq_sncmp)
+
+ movzbl 14(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 14(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 14(%edx)
+# endif
+ jne L(neq_sncmp)
+ test %cl, %cl
+ je L(eq_sncmp)
+
+ cmp $15, REM
+ je L(eq_sncmp)
+
+ movzbl 15(%eax), %ecx
+# ifdef USE_AS_STRNCASECMP_L
+ movzbl 15(%edx), %esi
+# ifdef PIC
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
+# else
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+ movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
+# endif
+ cmpl %ecx, %esi
+# else
+ cmpb %cl, 15(%edx)
+# endif
+ jne L(neq_sncmp)
+
+# ifdef USE_AS_STRNCASECMP_L
+L(eq_sncmp):
+ POP (%esi)
+# endif
+ POP (REM)
+# if defined USE_AS_STRNCASECMP_L && defined PIC
+ POP (%ebx)
+# endif
+ xor %eax, %eax
+ ret
+
+# ifdef USE_AS_STRNCASECMP_L
+ .p2align 4
+# ifdef PIC
+ CFI_PUSH (%ebx)
+# endif
+ CFI_PUSH (REM)
+ CFI_PUSH (%esi)
+L(neq_sncmp):
+ mov $1, %eax
+ mov $-1, %edx
+ cmovna %edx, %eax
+ POP (%esi)
+ POP (REM)
+# ifdef PIC
+ POP (%ebx)
+# endif
+ ret
+# endif
+#endif
+
+END (STRCMP)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp.S
new file mode 100644
index 0000000000..56de25a4b7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcmp.S
@@ -0,0 +1,95 @@
+/* Multiple versions of strcmp
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifdef USE_AS_STRNCMP
+# define STRCMP strncmp
+# define __GI_STRCMP __GI_strncmp
+# define __STRCMP_IA32 __strncmp_ia32
+# define __STRCMP_SSSE3 __strncmp_ssse3
+# define __STRCMP_SSE4_2 __strncmp_sse4_2
+#elif defined USE_AS_STRCASECMP_L
+# define STRCMP __strcasecmp_l
+# define __GI_STRCMP __GI_strcasecmp_l
+# define __STRCMP_IA32 __strcasecmp_l_ia32
+# define __STRCMP_SSSE3 __strcasecmp_l_ssse3
+# define __STRCMP_SSE4_2 __strcasecmp_l_sse4_2
+#elif defined USE_AS_STRNCASECMP_L
+# define STRCMP __strncasecmp_l
+# define __GI_STRCMP __GI_strncasecmp_l
+# define __STRCMP_IA32 __strncasecmp_l_ia32
+# define __STRCMP_SSSE3 __strncasecmp_l_ssse3
+# define __STRCMP_SSE4_2 __strncasecmp_l_sse4_2
+#else
+# define STRCMP strcmp
+# define __GI_STRCMP __GI_strcmp
+# define __STRCMP_IA32 __strcmp_ia32
+# define __STRCMP_SSSE3 __strcmp_ssse3
+# define __STRCMP_SSE4_2 __strcmp_sse4_2
+#endif
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncmp in static library since we
+ need strncmp before the initialization happened. */
+#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
+ .text
+ENTRY(STRCMP)
+ .type STRCMP, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__STRCMP_IA32)
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
+ jnz 2f
+ LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2)
+2: ret
+END(STRCMP)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __STRCMP_IA32, @function; \
+ .p2align 4; \
+ .globl __STRCMP_IA32; \
+ .hidden __STRCMP_IA32; \
+ __STRCMP_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32
+# endif
+#endif
+
+#if !defined USE_AS_STRNCMP && !defined USE_AS_STRCASECMP_L \
+ && !defined USE_AS_STRNCASECMP_L
+# include "../strcmp.S"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-sse2.S
new file mode 100644
index 0000000000..ed627a5f62
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-sse2.S
@@ -0,0 +1,2250 @@
+/* strcpy with SSE2 and unaligned load
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef STRCPY
+# define STRCPY __strcpy_sse2
+# endif
+
+# define STR1 PARMS
+# define STR2 STR1+4
+# define LEN STR2+4
+
+# ifdef USE_AS_STRNCPY
+# define PARMS 16
+# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
+# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
+ CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
+
+# ifdef SHARED
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into ECX and branch to it. TABLE is a
+ jump table with relative offsets.
+ INDEX is a register contains the index into the jump table.
+ SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into ECX. */ \
+ SETUP_PIC_REG(cx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ecx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ecx,INDEX,SCALE), %ecx; \
+ /* We loaded the jump table and adjusted ECX. Go. */ \
+ jmp *%ecx
+# else
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+# endif
+
+.text
+ENTRY (STRCPY)
+ ENTRANCE
+ mov STR1(%esp), %edi
+ mov STR2(%esp), %esi
+ movl LEN(%esp), %ebx
+ test %ebx, %ebx
+ jz L(ExitZero)
+
+ mov %esi, %ecx
+# ifndef USE_AS_STPCPY
+ mov %edi, %eax /* save result */
+# endif
+ and $15, %ecx
+ jz L(SourceStringAlignmentZero)
+
+ and $-16, %esi
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+
+ pcmpeqb (%esi), %xmm1
+ add %ecx, %ebx
+ pmovmskb %xmm1, %edx
+ shr %cl, %edx
+# ifdef USE_AS_STPCPY
+ cmp $16, %ebx
+ jbe L(CopyFrom1To16BytesTailCase2OrCase3)
+# else
+ cmp $17, %ebx
+ jbe L(CopyFrom1To16BytesTailCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail)
+
+ pcmpeqb 16(%esi), %xmm0
+ pmovmskb %xmm0, %edx
+# ifdef USE_AS_STPCPY
+ cmp $32, %ebx
+ jbe L(CopyFrom1To32BytesCase2OrCase3)
+# else
+ cmp $33, %ebx
+ jbe L(CopyFrom1To32BytesCase2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes)
+
+ movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+ movdqu %xmm1, (%edi)
+
+ sub %ecx, %edi
+
+/* If source address alignment != destination address alignment */
+ .p2align 4
+L(Unalign16Both):
+ mov $16, %ecx
+ movdqa (%esi, %ecx), %xmm1
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%edi, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ sub $48, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm2)
+
+ movaps 16(%esi, %ecx), %xmm3
+ movdqu %xmm2, (%edi, %ecx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm3)
+
+ movaps 16(%esi, %ecx), %xmm4
+ movdqu %xmm3, (%edi, %ecx)
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm4)
+
+ movaps 16(%esi, %ecx), %xmm1
+ movdqu %xmm4, (%edi, %ecx)
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm1)
+
+ movaps 16(%esi, %ecx), %xmm2
+ movdqu %xmm1, (%edi, %ecx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm2)
+
+ movaps 16(%esi, %ecx), %xmm3
+ movdqu %xmm2, (%edi, %ecx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm3)
+
+ movdqu %xmm3, (%edi, %ecx)
+ mov %esi, %edx
+ lea 16(%esi, %ecx), %esi
+ and $-0x40, %esi
+ sub %esi, %edx
+ sub %edx, %edi
+ lea 128(%ebx, %edx), %ebx
+
+L(Unaligned64Loop):
+ movaps (%esi), %xmm2
+ movaps %xmm2, %xmm4
+ movaps 16(%esi), %xmm5
+ movaps 32(%esi), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 48(%esi), %xmm7
+ pminub %xmm5, %xmm2
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+ sub $64, %ebx
+ jbe L(UnalignedLeaveCase2OrCase3)
+ test %edx, %edx
+ jnz L(Unaligned64Leave)
+L(Unaligned64Loop_start):
+ add $64, %edi
+ add $64, %esi
+ movdqu %xmm4, -64(%edi)
+ movaps (%esi), %xmm2
+ movdqa %xmm2, %xmm4
+ movdqu %xmm5, -48(%edi)
+ movaps 16(%esi), %xmm5
+ pminub %xmm5, %xmm2
+ movaps 32(%esi), %xmm3
+ movdqu %xmm6, -32(%edi)
+ movaps %xmm3, %xmm6
+ movdqu %xmm7, -16(%edi)
+ movaps 48(%esi), %xmm7
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+ sub $64, %ebx
+ jbe L(UnalignedLeaveCase2OrCase3)
+ test %edx, %edx
+ jz L(Unaligned64Loop_start)
+L(Unaligned64Leave):
+ pxor %xmm1, %xmm1
+
+ pcmpeqb %xmm4, %xmm0
+ pcmpeqb %xmm5, %xmm1
+ pmovmskb %xmm0, %edx
+ pmovmskb %xmm1, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnaligned_0)
+ test %ecx, %ecx
+ jnz L(CopyFrom1To16BytesUnaligned_16)
+
+ pcmpeqb %xmm6, %xmm0
+ pcmpeqb %xmm7, %xmm1
+ pmovmskb %xmm0, %edx
+ pmovmskb %xmm1, %ecx
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnaligned_32)
+
+ bsf %ecx, %edx
+ movdqu %xmm4, (%edi)
+ movdqu %xmm5, 16(%edi)
+ movdqu %xmm6, 32(%edi)
+# ifdef USE_AS_STPCPY
+ lea 48(%edi, %edx), %eax
+# endif
+ movdqu %xmm7, 48(%edi)
+ add $15, %ebx
+ sub %edx, %ebx
+ lea 49(%edi, %edx), %edi
+ jmp L(StrncpyFillTailWithZero)
+
+/* If source address alignment == destination address alignment */
+
+L(SourceStringAlignmentZero):
+ pxor %xmm0, %xmm0
+ movdqa (%esi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+# ifdef USE_AS_STPCPY
+ cmp $16, %ebx
+ jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
+# else
+ cmp $17, %ebx
+ jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail1)
+
+ pcmpeqb 16(%esi), %xmm0
+ movdqu %xmm1, (%edi)
+ pmovmskb %xmm0, %edx
+# ifdef USE_AS_STPCPY
+ cmp $32, %ebx
+ jbe L(CopyFrom1To32Bytes1Case2OrCase3)
+# else
+ cmp $33, %ebx
+ jbe L(CopyFrom1To32Bytes1Case2OrCase3)
+# endif
+ test %edx, %edx
+ jnz L(CopyFrom1To32Bytes1)
+
+ jmp L(Unalign16Both)
+
+/*-----------------End of main part---------------------------*/
+
+/* Case1 */
+ .p2align 4
+L(CopyFrom1To16BytesTail):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes1):
+ add $16, %esi
+ add $16, %edi
+ sub $16, %ebx
+L(CopyFrom1To16BytesTail1):
+ bsf %edx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes):
+ sub %ecx, %ebx
+ bsf %edx, %edx
+ add %ecx, %esi
+ add $16, %edx
+ sub %ecx, %edx
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_0):
+ bsf %edx, %edx
+# ifdef USE_AS_STPCPY
+ lea (%edi, %edx), %eax
+# endif
+ movdqu %xmm4, (%edi)
+ add $63, %ebx
+ sub %edx, %ebx
+ lea 1(%edi, %edx), %edi
+ jmp L(StrncpyFillTailWithZero)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_16):
+ bsf %ecx, %edx
+ movdqu %xmm4, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 16(%edi, %edx), %eax
+# endif
+ movdqu %xmm5, 16(%edi)
+ add $47, %ebx
+ sub %edx, %ebx
+ lea 17(%edi, %edx), %edi
+ jmp L(StrncpyFillTailWithZero)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnaligned_32):
+ bsf %edx, %edx
+ movdqu %xmm4, (%edi)
+ movdqu %xmm5, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 32(%edi, %edx), %eax
+# endif
+ movdqu %xmm6, 32(%edi)
+ add $31, %ebx
+ sub %edx, %ebx
+ lea 33(%edi, %edx), %edi
+ jmp L(StrncpyFillTailWithZero)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnalignedXmm6):
+ movdqu %xmm6, (%edi, %ecx)
+ jmp L(CopyFrom1To16BytesXmmExit)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnalignedXmm5):
+ movdqu %xmm5, (%edi, %ecx)
+ jmp L(CopyFrom1To16BytesXmmExit)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnalignedXmm4):
+ movdqu %xmm4, (%edi, %ecx)
+ jmp L(CopyFrom1To16BytesXmmExit)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnalignedXmm3):
+ movdqu %xmm3, (%edi, %ecx)
+ jmp L(CopyFrom1To16BytesXmmExit)
+
+ .p2align 4
+L(CopyFrom1To16BytesUnalignedXmm1):
+ movdqu %xmm1, (%edi, %ecx)
+ jmp L(CopyFrom1To16BytesXmmExit)
+
+ .p2align 4
+L(CopyFrom1To16BytesExit):
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
+
+/* Case2 */
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %ebx
+ add %ecx, %edi
+ add %ecx, %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32BytesCase2):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ add $16, %edx
+ sub %ecx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTailCase2):
+ sub %ecx, %ebx
+ add %ecx, %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+L(CopyFrom1To16BytesTail1Case2):
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+/* Case2 or Case3, Case3 */
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesCase2)
+L(CopyFrom1To16BytesCase3):
+ add $16, %ebx
+ add %ecx, %edi
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32BytesCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To32BytesCase2)
+ sub %ecx, %ebx
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To16BytesTailCase2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTailCase2)
+ sub %ecx, %ebx
+ add %ecx, %esi
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+ .p2align 4
+L(CopyFrom1To32Bytes1Case2OrCase3):
+ add $16, %edi
+ add $16, %esi
+ sub $16, %ebx
+L(CopyFrom1To16BytesTail1Case2OrCase3):
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesTail1Case2)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+ .p2align 4
+L(Exit0):
+# ifdef USE_AS_STPCPY
+ mov %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit1):
+ movb %dh, (%edi)
+# ifdef USE_AS_STPCPY
+ lea (%edi), %eax
+# endif
+ sub $1, %ebx
+ lea 1(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit2):
+ movw (%esi), %dx
+ movw %dx, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 1(%edi), %eax
+# endif
+ sub $2, %ebx
+ lea 2(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit3):
+ movw (%esi), %cx
+ movw %cx, (%edi)
+ movb %dh, 2(%edi)
+# ifdef USE_AS_STPCPY
+ lea 2(%edi), %eax
+# endif
+ sub $3, %ebx
+ lea 3(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit4):
+ movl (%esi), %edx
+ movl %edx, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 3(%edi), %eax
+# endif
+ sub $4, %ebx
+ lea 4(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit5):
+ movl (%esi), %ecx
+ movb %dh, 4(%edi)
+ movl %ecx, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 4(%edi), %eax
+# endif
+ sub $5, %ebx
+ lea 5(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit6):
+ movl (%esi), %ecx
+ movw 4(%esi), %dx
+ movl %ecx, (%edi)
+ movw %dx, 4(%edi)
+# ifdef USE_AS_STPCPY
+ lea 5(%edi), %eax
+# endif
+ sub $6, %ebx
+ lea 6(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit7):
+ movl (%esi), %ecx
+ movl 3(%esi), %edx
+ movl %ecx, (%edi)
+ movl %edx, 3(%edi)
+# ifdef USE_AS_STPCPY
+ lea 6(%edi), %eax
+# endif
+ sub $7, %ebx
+ lea 7(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit8):
+ movlpd (%esi), %xmm0
+ movlpd %xmm0, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 7(%edi), %eax
+# endif
+ sub $8, %ebx
+ lea 8(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit9):
+ movlpd (%esi), %xmm0
+ movb %dh, 8(%edi)
+ movlpd %xmm0, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 8(%edi), %eax
+# endif
+ sub $9, %ebx
+ lea 9(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit10):
+ movlpd (%esi), %xmm0
+ movw 8(%esi), %dx
+ movlpd %xmm0, (%edi)
+ movw %dx, 8(%edi)
+# ifdef USE_AS_STPCPY
+ lea 9(%edi), %eax
+# endif
+ sub $10, %ebx
+ lea 10(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit11):
+ movlpd (%esi), %xmm0
+ movl 7(%esi), %edx
+ movlpd %xmm0, (%edi)
+ movl %edx, 7(%edi)
+# ifdef USE_AS_STPCPY
+ lea 10(%edi), %eax
+# endif
+ sub $11, %ebx
+ lea 11(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit12):
+ movlpd (%esi), %xmm0
+ movl 8(%esi), %edx
+ movlpd %xmm0, (%edi)
+ movl %edx, 8(%edi)
+# ifdef USE_AS_STPCPY
+ lea 11(%edi), %eax
+# endif
+ sub $12, %ebx
+ lea 12(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit13):
+ movlpd (%esi), %xmm0
+ movlpd 5(%esi), %xmm1
+ movlpd %xmm0, (%edi)
+ movlpd %xmm1, 5(%edi)
+# ifdef USE_AS_STPCPY
+ lea 12(%edi), %eax
+# endif
+ sub $13, %ebx
+ lea 13(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit14):
+ movlpd (%esi), %xmm0
+ movlpd 6(%esi), %xmm1
+ movlpd %xmm0, (%edi)
+ movlpd %xmm1, 6(%edi)
+# ifdef USE_AS_STPCPY
+ lea 13(%edi), %eax
+# endif
+ sub $14, %ebx
+ lea 14(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit15):
+ movlpd (%esi), %xmm0
+ movlpd 7(%esi), %xmm1
+ movlpd %xmm0, (%edi)
+ movlpd %xmm1, 7(%edi)
+# ifdef USE_AS_STPCPY
+ lea 14(%edi), %eax
+# endif
+ sub $15, %ebx
+ lea 15(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit16):
+ movdqu (%esi), %xmm0
+ movdqu %xmm0, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 15(%edi), %eax
+# endif
+ sub $16, %ebx
+ lea 16(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit17):
+ movdqu (%esi), %xmm0
+ movdqu %xmm0, (%edi)
+ movb %dh, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 16(%edi), %eax
+# endif
+ sub $17, %ebx
+ lea 17(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit18):
+ movdqu (%esi), %xmm0
+ movw 16(%esi), %cx
+ movdqu %xmm0, (%edi)
+ movw %cx, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 17(%edi), %eax
+# endif
+ sub $18, %ebx
+ lea 18(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit19):
+ movdqu (%esi), %xmm0
+ movl 15(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movl %ecx, 15(%edi)
+# ifdef USE_AS_STPCPY
+ lea 18(%edi), %eax
+# endif
+ sub $19, %ebx
+ lea 19(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit20):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movl %ecx, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 19(%edi), %eax
+# endif
+ sub $20, %ebx
+ lea 20(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit21):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movl %ecx, 16(%edi)
+ movb %dh, 20(%edi)
+# ifdef USE_AS_STPCPY
+ lea 20(%edi), %eax
+# endif
+ sub $21, %ebx
+ lea 21(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit22):
+ movdqu (%esi), %xmm0
+ movlpd 14(%esi), %xmm3
+ movdqu %xmm0, (%edi)
+ movlpd %xmm3, 14(%edi)
+# ifdef USE_AS_STPCPY
+ lea 21(%edi), %eax
+# endif
+ sub $22, %ebx
+ lea 22(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit23):
+ movdqu (%esi), %xmm0
+ movlpd 15(%esi), %xmm3
+ movdqu %xmm0, (%edi)
+ movlpd %xmm3, 15(%edi)
+# ifdef USE_AS_STPCPY
+ lea 22(%edi), %eax
+# endif
+ sub $23, %ebx
+ lea 23(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit24):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 23(%edi), %eax
+# endif
+ sub $24, %ebx
+ lea 24(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit25):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movb %dh, 24(%edi)
+# ifdef USE_AS_STPCPY
+ lea 24(%edi), %eax
+# endif
+ sub $25, %ebx
+ lea 25(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit26):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movw 24(%esi), %cx
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movw %cx, 24(%edi)
+# ifdef USE_AS_STPCPY
+ lea 25(%edi), %eax
+# endif
+ sub $26, %ebx
+ lea 26(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit27):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 23(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movl %ecx, 23(%edi)
+# ifdef USE_AS_STPCPY
+ lea 26(%edi), %eax
+# endif
+ sub $27, %ebx
+ lea 27(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit28):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 24(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movl %ecx, 24(%edi)
+# ifdef USE_AS_STPCPY
+ lea 27(%edi), %eax
+# endif
+ sub $28, %ebx
+ lea 28(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit29):
+ movdqu (%esi), %xmm0
+ movdqu 13(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 13(%edi)
+# ifdef USE_AS_STPCPY
+ lea 28(%edi), %eax
+# endif
+ sub $29, %ebx
+ lea 29(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit30):
+ movdqu (%esi), %xmm0
+ movdqu 14(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 14(%edi)
+# ifdef USE_AS_STPCPY
+ lea 29(%edi), %eax
+# endif
+ sub $30, %ebx
+ lea 30(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+
+ .p2align 4
+L(Exit31):
+ movdqu (%esi), %xmm0
+ movdqu 15(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 15(%edi)
+# ifdef USE_AS_STPCPY
+ lea 30(%edi), %eax
+# endif
+ sub $31, %ebx
+ lea 31(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(Exit32):
+ movdqu (%esi), %xmm0
+ movdqu 16(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 31(%edi), %eax
+# endif
+ sub $32, %ebx
+ lea 32(%edi), %edi
+ jnz L(StrncpyFillTailWithZero)
+ RETURN
+
+ .p2align 4
+L(StrncpyExit1):
+ movb (%esi), %dl
+ movb %dl, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 1(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit2):
+ movw (%esi), %dx
+ movw %dx, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 2(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit3):
+ movw (%esi), %cx
+ movb 2(%esi), %dl
+ movw %cx, (%edi)
+ movb %dl, 2(%edi)
+# ifdef USE_AS_STPCPY
+ lea 3(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit4):
+ movl (%esi), %edx
+ movl %edx, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 4(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit5):
+ movl (%esi), %ecx
+ movb 4(%esi), %dl
+ movl %ecx, (%edi)
+ movb %dl, 4(%edi)
+# ifdef USE_AS_STPCPY
+ lea 5(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit6):
+ movl (%esi), %ecx
+ movw 4(%esi), %dx
+ movl %ecx, (%edi)
+ movw %dx, 4(%edi)
+# ifdef USE_AS_STPCPY
+ lea 6(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit7):
+ movl (%esi), %ecx
+ movl 3(%esi), %edx
+ movl %ecx, (%edi)
+ movl %edx, 3(%edi)
+# ifdef USE_AS_STPCPY
+ lea 7(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit8):
+ movlpd (%esi), %xmm0
+ movlpd %xmm0, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 8(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit9):
+ movlpd (%esi), %xmm0
+ movb 8(%esi), %dl
+ movlpd %xmm0, (%edi)
+ movb %dl, 8(%edi)
+# ifdef USE_AS_STPCPY
+ lea 9(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit10):
+ movlpd (%esi), %xmm0
+ movw 8(%esi), %dx
+ movlpd %xmm0, (%edi)
+ movw %dx, 8(%edi)
+# ifdef USE_AS_STPCPY
+ lea 10(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit11):
+ movlpd (%esi), %xmm0
+ movl 7(%esi), %edx
+ movlpd %xmm0, (%edi)
+ movl %edx, 7(%edi)
+# ifdef USE_AS_STPCPY
+ lea 11(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit12):
+ movlpd (%esi), %xmm0
+ movl 8(%esi), %edx
+ movlpd %xmm0, (%edi)
+ movl %edx, 8(%edi)
+# ifdef USE_AS_STPCPY
+ lea 12(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit13):
+ movlpd (%esi), %xmm0
+ movlpd 5(%esi), %xmm1
+ movlpd %xmm0, (%edi)
+ movlpd %xmm1, 5(%edi)
+# ifdef USE_AS_STPCPY
+ lea 13(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit14):
+ movlpd (%esi), %xmm0
+ movlpd 6(%esi), %xmm1
+ movlpd %xmm0, (%edi)
+ movlpd %xmm1, 6(%edi)
+# ifdef USE_AS_STPCPY
+ lea 14(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit15):
+ movlpd (%esi), %xmm0
+ movlpd 7(%esi), %xmm1
+ movlpd %xmm0, (%edi)
+ movlpd %xmm1, 7(%edi)
+# ifdef USE_AS_STPCPY
+ lea 15(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit16):
+ movdqu (%esi), %xmm0
+ movdqu %xmm0, (%edi)
+# ifdef USE_AS_STPCPY
+ lea 16(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit17):
+ movdqu (%esi), %xmm0
+ movb 16(%esi), %cl
+ movdqu %xmm0, (%edi)
+ movb %cl, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 17(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit18):
+ movdqu (%esi), %xmm0
+ movw 16(%esi), %cx
+ movdqu %xmm0, (%edi)
+ movw %cx, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 18(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit19):
+ movdqu (%esi), %xmm0
+ movl 15(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movl %ecx, 15(%edi)
+# ifdef USE_AS_STPCPY
+ lea 19(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit20):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movl %ecx, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 20(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit21):
+ movdqu (%esi), %xmm0
+ movl 16(%esi), %ecx
+ movb 20(%esi), %dl
+ movdqu %xmm0, (%edi)
+ movl %ecx, 16(%edi)
+ movb %dl, 20(%edi)
+# ifdef USE_AS_STPCPY
+ lea 21(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit22):
+ movdqu (%esi), %xmm0
+ movlpd 14(%esi), %xmm3
+ movdqu %xmm0, (%edi)
+ movlpd %xmm3, 14(%edi)
+# ifdef USE_AS_STPCPY
+ lea 22(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit23):
+ movdqu (%esi), %xmm0
+ movlpd 15(%esi), %xmm3
+ movdqu %xmm0, (%edi)
+ movlpd %xmm3, 15(%edi)
+# ifdef USE_AS_STPCPY
+ lea 23(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit24):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 24(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit25):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movb 24(%esi), %cl
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movb %cl, 24(%edi)
+# ifdef USE_AS_STPCPY
+ lea 25(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit26):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movw 24(%esi), %cx
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movw %cx, 24(%edi)
+# ifdef USE_AS_STPCPY
+ lea 26(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit27):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 23(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movl %ecx, 23(%edi)
+# ifdef USE_AS_STPCPY
+ lea 27(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit28):
+ movdqu (%esi), %xmm0
+ movlpd 16(%esi), %xmm2
+ movl 24(%esi), %ecx
+ movdqu %xmm0, (%edi)
+ movlpd %xmm2, 16(%edi)
+ movl %ecx, 24(%edi)
+# ifdef USE_AS_STPCPY
+ lea 28(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit29):
+ movdqu (%esi), %xmm0
+ movdqu 13(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 13(%edi)
+# ifdef USE_AS_STPCPY
+ lea 29(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit30):
+ movdqu (%esi), %xmm0
+ movdqu 14(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 14(%edi)
+# ifdef USE_AS_STPCPY
+ lea 30(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit31):
+ movdqu (%esi), %xmm0
+ movdqu 15(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 15(%edi)
+# ifdef USE_AS_STPCPY
+ lea 31(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit32):
+ movdqu (%esi), %xmm0
+ movdqu 16(%esi), %xmm2
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 16(%edi)
+# ifdef USE_AS_STPCPY
+ lea 32(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit33):
+ movdqu (%esi), %xmm0
+ movdqu 16(%esi), %xmm2
+ movb 32(%esi), %cl
+ movdqu %xmm0, (%edi)
+ movdqu %xmm2, 16(%edi)
+ movb %cl, 32(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill0):
+ RETURN
+
+ .p2align 4
+L(Fill1):
+ movb %dl, (%edi)
+ RETURN
+
+ .p2align 4
+L(Fill2):
+ movw %dx, (%edi)
+ RETURN
+
+ .p2align 4
+L(Fill3):
+ movl %edx, -1(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill4):
+ movl %edx, (%edi)
+ RETURN
+
+ .p2align 4
+L(Fill5):
+ movl %edx, (%edi)
+ movb %dl, 4(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill6):
+ movl %edx, (%edi)
+ movw %dx, 4(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill7):
+ movlpd %xmm0, -1(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill8):
+ movlpd %xmm0, (%edi)
+ RETURN
+
+ .p2align 4
+L(Fill9):
+ movlpd %xmm0, (%edi)
+ movb %dl, 8(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill10):
+ movlpd %xmm0, (%edi)
+ movw %dx, 8(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill11):
+ movlpd %xmm0, (%edi)
+ movl %edx, 7(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill12):
+ movlpd %xmm0, (%edi)
+ movl %edx, 8(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill13):
+ movlpd %xmm0, (%edi)
+ movlpd %xmm0, 5(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill14):
+ movlpd %xmm0, (%edi)
+ movlpd %xmm0, 6(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill15):
+ movdqu %xmm0, -1(%edi)
+ RETURN
+
+ .p2align 4
+L(Fill16):
+ movdqu %xmm0, (%edi)
+ RETURN
+
+ .p2align 4
+L(CopyFrom1To16BytesUnalignedXmm2):
+ movdqu %xmm2, (%edi, %ecx)
+
+ .p2align 4
+L(CopyFrom1To16BytesXmmExit):
+ bsf %edx, %edx
+ add $15, %ebx
+ add %ecx, %edi
+# ifdef USE_AS_STPCPY
+ lea (%edi, %edx), %eax
+# endif
+ sub %edx, %ebx
+ lea 1(%edi, %edx), %edi
+
+ .p2align 4
+L(StrncpyFillTailWithZero):
+ pxor %xmm0, %xmm0
+ xor %edx, %edx
+ sub $16, %ebx
+ jbe L(StrncpyFillExit)
+
+ movdqu %xmm0, (%edi)
+ add $16, %edi
+
+ mov %edi, %esi
+ and $0xf, %esi
+ sub %esi, %edi
+ add %esi, %ebx
+ sub $64, %ebx
+ jb L(StrncpyFillLess64)
+
+L(StrncpyFillLoopMovdqa):
+ movdqa %xmm0, (%edi)
+ movdqa %xmm0, 16(%edi)
+ movdqa %xmm0, 32(%edi)
+ movdqa %xmm0, 48(%edi)
+ add $64, %edi
+ sub $64, %ebx
+ jae L(StrncpyFillLoopMovdqa)
+
+L(StrncpyFillLess64):
+ add $32, %ebx
+ jl L(StrncpyFillLess32)
+ movdqa %xmm0, (%edi)
+ movdqa %xmm0, 16(%edi)
+ add $32, %edi
+ sub $16, %ebx
+ jl L(StrncpyFillExit)
+ movdqa %xmm0, (%edi)
+ add $16, %edi
+ BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
+
+L(StrncpyFillLess32):
+ add $16, %ebx
+ jl L(StrncpyFillExit)
+ movdqa %xmm0, (%edi)
+ add $16, %edi
+ BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
+
+L(StrncpyFillExit):
+ add $16, %ebx
+ BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
+
+ .p2align 4
+L(UnalignedLeaveCase2OrCase3):
+ test %edx, %edx
+ jnz L(Unaligned64LeaveCase2)
+L(Unaligned64LeaveCase3):
+ lea 64(%ebx), %ecx
+ and $-16, %ecx
+ add $48, %ebx
+ jl L(CopyFrom1To16BytesCase3)
+ movdqu %xmm4, (%edi)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm5, 16(%edi)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm6, 32(%edi)
+ sub $16, %ebx
+ jb L(CopyFrom1To16BytesCase3)
+ movdqu %xmm7, 48(%edi)
+# ifdef USE_AS_STPCPY
+ lea 64(%edi), %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Unaligned64LeaveCase2):
+ xor %ecx, %ecx
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %edx
+ add $48, %ebx
+ jle L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm4)
+
+ pcmpeqb %xmm5, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm4, (%edi)
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm5)
+
+ pcmpeqb %xmm6, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm5, 16(%edi)
+ add $16, %ecx
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %edx, %edx
+ jnz L(CopyFrom1To16BytesUnalignedXmm6)
+
+ pcmpeqb %xmm7, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqu %xmm6, 32(%edi)
+ lea 16(%edi, %ecx), %edi
+ lea 16(%esi, %ecx), %esi
+ bsf %edx, %edx
+ cmp %ebx, %edx
+ jb L(CopyFrom1To16BytesExit)
+ BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
+
+ .p2align 4
+L(ExitZero):
+ movl %edi, %eax
+ RETURN
+
+END (STRCPY)
+
+ .p2align 4
+ .section .rodata
+L(ExitTable):
+ .int JMPTBL(L(Exit1), L(ExitTable))
+ .int JMPTBL(L(Exit2), L(ExitTable))
+ .int JMPTBL(L(Exit3), L(ExitTable))
+ .int JMPTBL(L(Exit4), L(ExitTable))
+ .int JMPTBL(L(Exit5), L(ExitTable))
+ .int JMPTBL(L(Exit6), L(ExitTable))
+ .int JMPTBL(L(Exit7), L(ExitTable))
+ .int JMPTBL(L(Exit8), L(ExitTable))
+ .int JMPTBL(L(Exit9), L(ExitTable))
+ .int JMPTBL(L(Exit10), L(ExitTable))
+ .int JMPTBL(L(Exit11), L(ExitTable))
+ .int JMPTBL(L(Exit12), L(ExitTable))
+ .int JMPTBL(L(Exit13), L(ExitTable))
+ .int JMPTBL(L(Exit14), L(ExitTable))
+ .int JMPTBL(L(Exit15), L(ExitTable))
+ .int JMPTBL(L(Exit16), L(ExitTable))
+ .int JMPTBL(L(Exit17), L(ExitTable))
+ .int JMPTBL(L(Exit18), L(ExitTable))
+ .int JMPTBL(L(Exit19), L(ExitTable))
+ .int JMPTBL(L(Exit20), L(ExitTable))
+ .int JMPTBL(L(Exit21), L(ExitTable))
+ .int JMPTBL(L(Exit22), L(ExitTable))
+ .int JMPTBL(L(Exit23), L(ExitTable))
+ .int JMPTBL(L(Exit24), L(ExitTable))
+ .int JMPTBL(L(Exit25), L(ExitTable))
+ .int JMPTBL(L(Exit26), L(ExitTable))
+ .int JMPTBL(L(Exit27), L(ExitTable))
+ .int JMPTBL(L(Exit28), L(ExitTable))
+ .int JMPTBL(L(Exit29), L(ExitTable))
+ .int JMPTBL(L(Exit30), L(ExitTable))
+ .int JMPTBL(L(Exit31), L(ExitTable))
+ .int JMPTBL(L(Exit32), L(ExitTable))
+
+L(ExitStrncpyTable):
+ .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
+ .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
+
+ .p2align 4
+L(FillTable):
+ .int JMPTBL(L(Fill0), L(FillTable))
+ .int JMPTBL(L(Fill1), L(FillTable))
+ .int JMPTBL(L(Fill2), L(FillTable))
+ .int JMPTBL(L(Fill3), L(FillTable))
+ .int JMPTBL(L(Fill4), L(FillTable))
+ .int JMPTBL(L(Fill5), L(FillTable))
+ .int JMPTBL(L(Fill6), L(FillTable))
+ .int JMPTBL(L(Fill7), L(FillTable))
+ .int JMPTBL(L(Fill8), L(FillTable))
+ .int JMPTBL(L(Fill9), L(FillTable))
+ .int JMPTBL(L(Fill10), L(FillTable))
+ .int JMPTBL(L(Fill11), L(FillTable))
+ .int JMPTBL(L(Fill12), L(FillTable))
+ .int JMPTBL(L(Fill13), L(FillTable))
+ .int JMPTBL(L(Fill14), L(FillTable))
+ .int JMPTBL(L(Fill15), L(FillTable))
+ .int JMPTBL(L(Fill16), L(FillTable))
+# else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
+# define RETURN1 ret
+
+ .text
+ENTRY (STRCPY)
+ ENTRANCE
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %ecx
+
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ cmpb $0, 3(%ecx)
+ jz L(ExitTail4)
+ cmpb $0, 4(%ecx)
+ jz L(ExitTail5)
+ cmpb $0, 5(%ecx)
+ jz L(ExitTail6)
+ cmpb $0, 6(%ecx)
+ jz L(ExitTail7)
+ cmpb $0, 7(%ecx)
+ jz L(ExitTail8)
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ cmpb $0, 11(%ecx)
+ jz L(ExitTail12)
+ cmpb $0, 12(%ecx)
+ jz L(ExitTail13)
+ cmpb $0, 13(%ecx)
+ jz L(ExitTail14)
+ cmpb $0, 14(%ecx)
+ jz L(ExitTail15)
+ cmpb $0, 15(%ecx)
+ jz L(ExitTail16)
+
+ PUSH (%edi)
+ PUSH (%ebx)
+
+ mov %edx, %edi
+ lea 16(%ecx), %ebx
+ and $-16, %ebx
+ pxor %xmm0, %xmm0
+ movdqu (%ecx), %xmm1
+ movdqu %xmm1, (%edx)
+ pcmpeqb (%ebx), %xmm0
+ pmovmskb %xmm0, %eax
+ sub %ecx, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ mov %ecx, %eax
+ lea 16(%ecx), %ecx
+ and $-16, %ecx
+ sub %ecx, %eax
+ sub %eax, %edx
+ xor %ebx, %ebx
+
+ .p2align 4
+ movdqa (%ecx), %xmm1
+ movaps 16(%ecx), %xmm2
+ movdqu %xmm1, (%edx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ add $16, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %ebx), %xmm3
+ movdqu %xmm2, (%edx, %ebx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ add $16, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %ebx), %xmm4
+ movdqu %xmm3, (%edx, %ebx)
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ add $16, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %ebx), %xmm1
+ movdqu %xmm4, (%edx, %ebx)
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ add $16, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %ebx), %xmm2
+ movdqu %xmm1, (%edx, %ebx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ add $16, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %ebx), %xmm3
+ movdqu %xmm2, (%edx, %ebx)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ add $16, %ebx
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movdqu %xmm3, (%edx, %ebx)
+ mov %ecx, %eax
+ lea 16(%ecx, %ebx), %ecx
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ sub %eax, %edx
+
+L(Aligned64Loop):
+ movaps (%ecx), %xmm2
+ movaps %xmm2, %xmm4
+ movaps 16(%ecx), %xmm5
+ movaps 32(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 48(%ecx), %xmm7
+ pminub %xmm5, %xmm2
+ add $64, %ecx
+ pminub %xmm7, %xmm3
+ add $64, %edx
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(Aligned64Leave)
+L(Aligned64Loop_start):
+ movdqu %xmm4, -64(%edx)
+ movaps (%ecx), %xmm2
+ movdqa %xmm2, %xmm4
+ movdqu %xmm5, -48(%edx)
+ movaps 16(%ecx), %xmm5
+ pminub %xmm5, %xmm2
+ movaps 32(%ecx), %xmm3
+ movdqu %xmm6, -32(%edx)
+ movaps %xmm3, %xmm6
+ movdqu %xmm7, -16(%edx)
+ movaps 48(%ecx), %xmm7
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ add $64, %edx
+ add $64, %ecx
+ test %eax, %eax
+ jz L(Aligned64Loop_start)
+L(Aligned64Leave):
+ sub $0xa0, %ebx
+ pxor %xmm0, %xmm0
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm5, %xmm0
+ pmovmskb %xmm0, %eax
+ movdqu %xmm4, -64(%edx)
+ test %eax, %eax
+ lea 16(%ebx), %ebx
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm6, %xmm0
+ pmovmskb %xmm0, %eax
+ movdqu %xmm5, -48(%edx)
+ test %eax, %eax
+ lea 16(%ebx), %ebx
+ jnz L(CopyFrom1To16Bytes)
+
+ movdqu %xmm6, -32(%edx)
+ pcmpeqb %xmm7, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%ebx), %ebx
+
+/*-----------------End of main part---------------------------*/
+
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %ebx, %edx
+ add %ebx, %ecx
+
+ POP (%ebx)
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ /* Exit 8 */
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+# ifdef USE_AS_STPCPY
+ lea 7(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitHigh):
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x08, %ah
+ jnz L(Exit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ /* Exit 16 */
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 8(%ecx), %xmm0
+ movlpd %xmm0, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 15(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit1):
+ movb (%ecx), %al
+ movb %al, (%edx)
+# ifdef USE_AS_STPCPY
+ lea (%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit2):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+# ifdef USE_AS_STPCPY
+ lea 1(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit3):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movb 2(%ecx), %al
+ movb %al, 2(%edx)
+# ifdef USE_AS_STPCPY
+ lea 2(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+# ifdef USE_AS_STPCPY
+ lea 3(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit5):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movb 4(%ecx), %al
+ movb %al, 4(%edx)
+# ifdef USE_AS_STPCPY
+ lea 4(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit6):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movw 4(%ecx), %ax
+ movw %ax, 4(%edx)
+# ifdef USE_AS_STPCPY
+ lea 5(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit7):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 3(%ecx), %eax
+ movl %eax, 3(%edx)
+# ifdef USE_AS_STPCPY
+ lea 6(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit9):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movb 8(%ecx), %al
+ movb %al, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 8(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit10):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movw 8(%ecx), %ax
+ movw %ax, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 9(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit11):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movl 7(%ecx), %eax
+ movl %eax, 7(%edx)
+# ifdef USE_AS_STPCPY
+ lea 10(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit12):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movl 8(%ecx), %eax
+ movl %eax, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 11(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit13):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 5(%ecx), %xmm0
+ movlpd %xmm0, 5(%edx)
+# ifdef USE_AS_STPCPY
+ lea 12(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit14):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 6(%ecx), %xmm0
+ movlpd %xmm0, 6(%edx)
+# ifdef USE_AS_STPCPY
+ lea 13(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(Exit15):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+# ifdef USE_AS_STPCPY
+ lea 14(%edx), %eax
+# else
+ movl %edi, %eax
+# endif
+ RETURN
+
+CFI_POP (%edi)
+
+ .p2align 4
+L(ExitTail1):
+ movb (%ecx), %al
+ movb %al, (%edx)
+ movl %edx, %eax
+ RETURN1
+
+ .p2align 4
+L(ExitTail2):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+# ifdef USE_AS_STPCPY
+ lea 1(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail3):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movb 2(%ecx), %al
+ movb %al, 2(%edx)
+# ifdef USE_AS_STPCPY
+ lea 2(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+# ifdef USE_AS_STPCPY
+ lea 3(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail5):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movb 4(%ecx), %al
+ movb %al, 4(%edx)
+# ifdef USE_AS_STPCPY
+ lea 4(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail6):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movw 4(%ecx), %ax
+ movw %ax, 4(%edx)
+# ifdef USE_AS_STPCPY
+ lea 5(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail7):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 3(%ecx), %eax
+ movl %eax, 3(%edx)
+# ifdef USE_AS_STPCPY
+ lea 6(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail8):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+# ifdef USE_AS_STPCPY
+ lea 7(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail9):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movb 8(%ecx), %al
+ movb %al, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 8(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail10):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movw 8(%ecx), %ax
+ movw %ax, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 9(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail11):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movl 7(%ecx), %eax
+ movl %eax, 7(%edx)
+# ifdef USE_AS_STPCPY
+ lea 10(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail12):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 4(%ecx), %eax
+ movl %eax, 4(%edx)
+ movl 8(%ecx), %eax
+ movl %eax, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 11(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail13):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 5(%ecx), %xmm0
+ movlpd %xmm0, 5(%edx)
+# ifdef USE_AS_STPCPY
+ lea 12(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail14):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 6(%ecx), %xmm0
+ movlpd %xmm0, 6(%edx)
+# ifdef USE_AS_STPCPY
+ lea 13(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail15):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+# ifdef USE_AS_STPCPY
+ lea 14(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitTail16):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 8(%ecx), %xmm0
+ movlpd %xmm0, 8(%edx)
+# ifdef USE_AS_STPCPY
+ lea 15(%edx), %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN1
+
+END (STRCPY)
+# endif
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
new file mode 100644
index 0000000000..effd85da94
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
@@ -0,0 +1,3901 @@
+/* strcpy with SSSE3
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+
+# ifndef USE_AS_STRCAT
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# ifndef STRCPY
+# define STRCPY __strcpy_ssse3
+# endif
+
+# ifdef USE_AS_STRNCPY
+# define PARMS 8
+# define ENTRANCE PUSH (%ebx)
+# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
+# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
+# else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN ret
+# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+
+# ifdef USE_AS_STPCPY
+# define SAVE_RESULT(n) lea n(%edx), %eax
+# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
+# else
+# define SAVE_RESULT(n) movl %edi, %eax
+# define SAVE_RESULT_TAIL(n) movl %edx, %eax
+# endif
+
+# define STR1 PARMS
+# define STR2 STR1+4
+# define LEN STR2+4
+
+/* In this code following instructions are used for copying:
+ movb - 1 byte
+ movw - 2 byte
+ movl - 4 byte
+ movlpd - 8 byte
+ movaps - 16 byte - requires 16 byte alignment
+ of sourse and destination adresses.
+*/
+
+.text
+ENTRY (STRCPY)
+ ENTRANCE
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %ecx
+# ifdef USE_AS_STRNCPY
+ movl LEN(%esp), %ebx
+ cmp $8, %ebx
+ jbe L(StrncpyExit8Bytes)
+# endif
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ cmpb $0, 3(%ecx)
+ jz L(ExitTail4)
+ cmpb $0, 4(%ecx)
+ jz L(ExitTail5)
+ cmpb $0, 5(%ecx)
+ jz L(ExitTail6)
+ cmpb $0, 6(%ecx)
+ jz L(ExitTail7)
+ cmpb $0, 7(%ecx)
+ jz L(ExitTail8)
+# ifdef USE_AS_STRNCPY
+ cmp $16, %ebx
+ jb L(StrncpyExit15Bytes)
+# endif
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ cmpb $0, 11(%ecx)
+ jz L(ExitTail12)
+ cmpb $0, 12(%ecx)
+ jz L(ExitTail13)
+ cmpb $0, 13(%ecx)
+ jz L(ExitTail14)
+ cmpb $0, 14(%ecx)
+ jz L(ExitTail15)
+# ifdef USE_AS_STRNCPY
+ cmp $16, %ebx
+ je L(ExitTail16)
+# endif
+ cmpb $0, 15(%ecx)
+ jz L(ExitTail16)
+
+ PUSH (%edi)
+ mov %edx, %edi
+# endif
+ PUSH (%esi)
+# ifdef USE_AS_STRNCPY
+ mov %ecx, %esi
+ sub $16, %ebx
+ and $0xf, %esi
+
+/* add 16 bytes ecx_offset to ebx */
+
+ add %esi, %ebx
+# endif
+ lea 16(%ecx), %esi
+ and $-16, %esi
+ pxor %xmm0, %xmm0
+ movlpd (%ecx), %xmm1
+ movlpd %xmm1, (%edx)
+
+ pcmpeqb (%esi), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm1, 8(%edx)
+
+ pmovmskb %xmm0, %eax
+ sub %ecx, %esi
+
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ mov %edx, %eax
+ lea 16(%edx), %edx
+ and $-16, %edx
+ sub %edx, %eax
+
+# ifdef USE_AS_STRNCPY
+ add %eax, %esi
+ lea -1(%esi), %esi
+ and $1<<31, %esi
+ test %esi, %esi
+ jnz L(ContinueCopy)
+ lea 16(%ebx), %ebx
+
+L(ContinueCopy):
+# endif
+ sub %eax, %ecx
+ mov %ecx, %eax
+ and $0xf, %eax
+ mov $0, %esi
+
+/* case: ecx_offset == edx_offset */
+
+ jz L(Align16Both)
+
+ cmp $8, %eax
+ jae L(ShlHigh8)
+ cmp $1, %eax
+ je L(Shl1)
+ cmp $2, %eax
+ je L(Shl2)
+ cmp $3, %eax
+ je L(Shl3)
+ cmp $4, %eax
+ je L(Shl4)
+ cmp $5, %eax
+ je L(Shl5)
+ cmp $6, %eax
+ je L(Shl6)
+ jmp L(Shl7)
+
+L(ShlHigh8):
+ je L(Shl8)
+ cmp $9, %eax
+ je L(Shl9)
+ cmp $10, %eax
+ je L(Shl10)
+ cmp $11, %eax
+ je L(Shl11)
+ cmp $12, %eax
+ je L(Shl12)
+ cmp $13, %eax
+ je L(Shl13)
+ cmp $14, %eax
+ je L(Shl14)
+ jmp L(Shl15)
+
+L(Align16Both):
+ movaps (%ecx), %xmm1
+ movaps 16(%ecx), %xmm2
+ movaps %xmm1, (%edx)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm3
+ movaps %xmm2, (%edx, %esi)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm4
+ movaps %xmm3, (%edx, %esi)
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm1
+ movaps %xmm4, (%edx, %esi)
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm2
+ movaps %xmm1, (%edx, %esi)
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm3
+ movaps %xmm2, (%edx, %esi)
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps %xmm3, (%edx, %esi)
+ mov %ecx, %eax
+ lea 16(%ecx, %esi), %ecx
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ lea 112(%ebx, %eax), %ebx
+# endif
+ mov $-0x40, %esi
+
+L(Aligned64Loop):
+ movaps (%ecx), %xmm2
+ movaps 32(%ecx), %xmm3
+ movaps %xmm2, %xmm4
+ movaps 16(%ecx), %xmm5
+ movaps %xmm3, %xmm6
+ movaps 48(%ecx), %xmm7
+ pminub %xmm5, %xmm2
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ lea 64(%edx), %edx
+ pcmpeqb %xmm0, %xmm3
+ lea 64(%ecx), %ecx
+ pmovmskb %xmm3, %eax
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeaveCase2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Aligned64Leave)
+ movaps %xmm4, -64(%edx)
+ movaps %xmm5, -48(%edx)
+ movaps %xmm6, -32(%edx)
+ movaps %xmm7, -16(%edx)
+ jmp L(Aligned64Loop)
+
+L(Aligned64Leave):
+# ifdef USE_AS_STRNCPY
+ lea 48(%ebx), %ebx
+# endif
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm5, %xmm0
+# ifdef USE_AS_STRNCPY
+ lea -16(%ebx), %ebx
+# endif
+ pmovmskb %xmm0, %eax
+ movaps %xmm4, -64(%edx)
+ test %eax, %eax
+ lea 16(%esi), %esi
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm6, %xmm0
+# ifdef USE_AS_STRNCPY
+ lea -16(%ebx), %ebx
+# endif
+ pmovmskb %xmm0, %eax
+ movaps %xmm5, -48(%edx)
+ test %eax, %eax
+ lea 16(%esi), %esi
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps %xmm6, -32(%edx)
+ pcmpeqb %xmm7, %xmm0
+# ifdef USE_AS_STRNCPY
+ lea -16(%ebx), %ebx
+# endif
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl1):
+ movaps -1(%ecx), %xmm1
+ movaps 15(%ecx), %xmm2
+L(Shl1Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit1Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl1LoopExit)
+
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 31(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit1Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl1LoopExit)
+
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 31(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit1Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl1LoopExit)
+
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 31(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit1Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl1LoopExit)
+
+ palignr $1, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 31(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -15(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -1(%ecx), %xmm1
+
+L(Shl1LoopStart):
+ movaps 15(%ecx), %xmm2
+ movaps 31(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 47(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 63(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $1, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $1, %xmm3, %xmm4
+ jnz L(Shl1Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave1)
+# endif
+ palignr $1, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl1LoopStart)
+
+L(Shl1LoopExit):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
+ mov $15, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl2):
+ movaps -2(%ecx), %xmm1
+ movaps 14(%ecx), %xmm2
+L(Shl2Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit2Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl2LoopExit)
+
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 30(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit2Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl2LoopExit)
+
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 30(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit2Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl2LoopExit)
+
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 30(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit2Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl2LoopExit)
+
+ palignr $2, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 30(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -14(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -2(%ecx), %xmm1
+
+L(Shl2LoopStart):
+ movaps 14(%ecx), %xmm2
+ movaps 30(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 46(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 62(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $2, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $2, %xmm3, %xmm4
+ jnz L(Shl2Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave2)
+# endif
+ palignr $2, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl2LoopStart)
+
+L(Shl2LoopExit):
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
+ mov $14, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl3):
+ movaps -3(%ecx), %xmm1
+ movaps 13(%ecx), %xmm2
+L(Shl3Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit3Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl3LoopExit)
+
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 29(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit3Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl3LoopExit)
+
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 29(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit3Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl3LoopExit)
+
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 29(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit3Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl3LoopExit)
+
+ palignr $3, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 29(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -13(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -3(%ecx), %xmm1
+
+L(Shl3LoopStart):
+ movaps 13(%ecx), %xmm2
+ movaps 29(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 45(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 61(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $3, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $3, %xmm3, %xmm4
+ jnz L(Shl3Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave3)
+# endif
+ palignr $3, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl3LoopStart)
+
+L(Shl3LoopExit):
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
+ mov $13, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl4):
+ movaps -4(%ecx), %xmm1
+ movaps 12(%ecx), %xmm2
+L(Shl4Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit4Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit4Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit4Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit4Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 28(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -12(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -4(%ecx), %xmm1
+
+L(Shl4LoopStart):
+ movaps 12(%ecx), %xmm2
+ movaps 28(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 44(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 60(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $4, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $4, %xmm3, %xmm4
+ jnz L(Shl4Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave4)
+# endif
+ palignr $4, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl4LoopStart)
+
+L(Shl4LoopExit):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
+ mov $12, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl5):
+ movaps -5(%ecx), %xmm1
+ movaps 11(%ecx), %xmm2
+L(Shl5Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit5Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl5LoopExit)
+
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 27(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit5Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl5LoopExit)
+
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 27(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit5Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl5LoopExit)
+
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 27(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit5Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl5LoopExit)
+
+ palignr $5, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 27(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -11(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -5(%ecx), %xmm1
+
+L(Shl5LoopStart):
+ movaps 11(%ecx), %xmm2
+ movaps 27(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 43(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 59(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $5, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $5, %xmm3, %xmm4
+ jnz L(Shl5Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave5)
+# endif
+ palignr $5, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl5LoopStart)
+
+L(Shl5LoopExit):
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
+ mov $11, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl6):
+ movaps -6(%ecx), %xmm1
+ movaps 10(%ecx), %xmm2
+L(Shl6Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit6Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl6LoopExit)
+
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 26(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit6Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl6LoopExit)
+
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 26(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit6Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl6LoopExit)
+
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 26(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit6Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl6LoopExit)
+
+ palignr $6, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 26(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -10(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -6(%ecx), %xmm1
+
+L(Shl6LoopStart):
+ movaps 10(%ecx), %xmm2
+ movaps 26(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 42(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 58(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $6, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $6, %xmm3, %xmm4
+ jnz L(Shl6Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave6)
+# endif
+ palignr $6, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl6LoopStart)
+
+L(Shl6LoopExit):
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
+ mov $10, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl7):
+ movaps -7(%ecx), %xmm1
+ movaps 9(%ecx), %xmm2
+L(Shl7Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit7Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl7LoopExit)
+
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 25(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit7Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl7LoopExit)
+
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 25(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit7Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl7LoopExit)
+
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 25(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit7Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl7LoopExit)
+
+ palignr $7, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 25(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -9(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -7(%ecx), %xmm1
+
+L(Shl7LoopStart):
+ movaps 9(%ecx), %xmm2
+ movaps 25(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 41(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 57(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $7, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $7, %xmm3, %xmm4
+ jnz L(Shl7Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave7)
+# endif
+ palignr $7, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl7LoopStart)
+
+L(Shl7LoopExit):
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
+ mov $9, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl8):
+ movaps -8(%ecx), %xmm1
+ movaps 8(%ecx), %xmm2
+L(Shl8Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit8Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit8Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit8Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit8Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 24(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -8(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -8(%ecx), %xmm1
+
+L(Shl8LoopStart):
+ movaps 8(%ecx), %xmm2
+ movaps 24(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 40(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 56(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $8, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $8, %xmm3, %xmm4
+ jnz L(Shl8Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave8)
+# endif
+ palignr $8, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl8LoopStart)
+
+L(Shl8LoopExit):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ mov $8, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl9):
+ movaps -9(%ecx), %xmm1
+ movaps 7(%ecx), %xmm2
+L(Shl9Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit9Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl9LoopExit)
+
+ palignr $9, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 23(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit9Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl9LoopExit)
+
+ palignr $9, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 23(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit9Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl9LoopExit)
+
+ palignr $9, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 23(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit9Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl9LoopExit)
+
+ palignr $9, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 23(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -7(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -9(%ecx), %xmm1
+
+L(Shl9LoopStart):
+ movaps 7(%ecx), %xmm2
+ movaps 23(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 39(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 55(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $9, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $9, %xmm3, %xmm4
+ jnz L(Shl9Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave9)
+# endif
+ palignr $9, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $9, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl9LoopStart)
+
+L(Shl9LoopExit):
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
+ mov $7, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl10):
+ movaps -10(%ecx), %xmm1
+ movaps 6(%ecx), %xmm2
+L(Shl10Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit10Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl10LoopExit)
+
+ palignr $10, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 22(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit10Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl10LoopExit)
+
+ palignr $10, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 22(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit10Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl10LoopExit)
+
+ palignr $10, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 22(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit10Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl10LoopExit)
+
+ palignr $10, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 22(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -6(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -10(%ecx), %xmm1
+
+L(Shl10LoopStart):
+ movaps 6(%ecx), %xmm2
+ movaps 22(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 38(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 54(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $10, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $10, %xmm3, %xmm4
+ jnz L(Shl10Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave10)
+# endif
+ palignr $10, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $10, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl10LoopStart)
+
+L(Shl10LoopExit):
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
+ mov $6, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl11):
+ movaps -11(%ecx), %xmm1
+ movaps 5(%ecx), %xmm2
+L(Shl11Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit11Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl11LoopExit)
+
+ palignr $11, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 21(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit11Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl11LoopExit)
+
+ palignr $11, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 21(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit11Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl11LoopExit)
+
+ palignr $11, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 21(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit11Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl11LoopExit)
+
+ palignr $11, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 21(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -5(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -11(%ecx), %xmm1
+
+L(Shl11LoopStart):
+ movaps 5(%ecx), %xmm2
+ movaps 21(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 37(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 53(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $11, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $11, %xmm3, %xmm4
+ jnz L(Shl11Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave11)
+# endif
+ palignr $11, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $11, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl11LoopStart)
+
+L(Shl11LoopExit):
+ movlpd -3(%ecx), %xmm0
+ movlpd %xmm0, -3(%edx)
+ mov $5, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl12):
+ movaps -12(%ecx), %xmm1
+ movaps 4(%ecx), %xmm2
+L(Shl12Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit12Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit12Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit12Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit12Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 20(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -4(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -12(%ecx), %xmm1
+
+L(Shl12LoopStart):
+ movaps 4(%ecx), %xmm2
+ movaps 20(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 36(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 52(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $12, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $12, %xmm3, %xmm4
+ jnz L(Shl12Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave12)
+# endif
+ palignr $12, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl12LoopStart)
+
+L(Shl12LoopExit):
+ movl (%ecx), %esi
+ movl %esi, (%edx)
+ mov $4, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl13):
+ movaps -13(%ecx), %xmm1
+ movaps 3(%ecx), %xmm2
+L(Shl13Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit13Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl13LoopExit)
+
+ palignr $13, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 19(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit13Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl13LoopExit)
+
+ palignr $13, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 19(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit13Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl13LoopExit)
+
+ palignr $13, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 19(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit13Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl13LoopExit)
+
+ palignr $13, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 19(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -3(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -13(%ecx), %xmm1
+
+L(Shl13LoopStart):
+ movaps 3(%ecx), %xmm2
+ movaps 19(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 35(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 51(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $13, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $13, %xmm3, %xmm4
+ jnz L(Shl13Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave13)
+# endif
+ palignr $13, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $13, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl13LoopStart)
+
+L(Shl13LoopExit):
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
+ mov $3, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl14):
+ movaps -14(%ecx), %xmm1
+ movaps 2(%ecx), %xmm2
+L(Shl14Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit14Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl14LoopExit)
+
+ palignr $14, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 18(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit14Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl14LoopExit)
+
+ palignr $14, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 18(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit14Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl14LoopExit)
+
+ palignr $14, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 18(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit14Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl14LoopExit)
+
+ palignr $14, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 18(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -2(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -14(%ecx), %xmm1
+
+L(Shl14LoopStart):
+ movaps 2(%ecx), %xmm2
+ movaps 18(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 34(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 50(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $14, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $14, %xmm3, %xmm4
+ jnz L(Shl14Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave14)
+# endif
+ palignr $14, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $14, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl14LoopStart)
+
+L(Shl14LoopExit):
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
+ mov $2, %esi
+ jmp L(CopyFrom1To16Bytes)
+
+ .p2align 4
+L(Shl15):
+ movaps -15(%ecx), %xmm1
+ movaps 1(%ecx), %xmm2
+L(Shl15Start):
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit15Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl15LoopExit)
+
+ palignr $15, %xmm1, %xmm2
+ movaps %xmm3, %xmm1
+ movaps %xmm2, (%edx)
+ movaps 17(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit15Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl15LoopExit)
+
+ palignr $15, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 17(%ecx), %xmm2
+ movaps %xmm3, %xmm1
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit15Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl15LoopExit)
+
+ palignr $15, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 17(%ecx), %xmm2
+
+ pcmpeqb %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ jbe L(StrncpyExit15Case2OrCase3)
+# endif
+ test %eax, %eax
+ jnz L(Shl15LoopExit)
+
+ palignr $15, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 17(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -1(%ecx), %ecx
+ sub %eax, %edx
+# ifdef USE_AS_STRNCPY
+ add %eax, %ebx
+# endif
+ movaps -15(%ecx), %xmm1
+
+L(Shl15LoopStart):
+ movaps 1(%ecx), %xmm2
+ movaps 17(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 33(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 49(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqb %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $15, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $15, %xmm3, %xmm4
+ jnz L(Shl15Start)
+# ifdef USE_AS_STRNCPY
+ sub $64, %ebx
+ jbe L(StrncpyLeave15)
+# endif
+ palignr $15, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $15, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl15LoopStart)
+
+L(Shl15LoopExit):
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
+ mov $1, %esi
+# ifdef USE_AS_STRCAT
+ jmp L(CopyFrom1To16Bytes)
+# endif
+
+
+# ifndef USE_AS_STRCAT
+
+ .p2align 4
+L(CopyFrom1To16Bytes):
+# ifdef USE_AS_STRNCPY
+ add $16, %ebx
+# endif
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+ test %al, %al
+ jz L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+ mov %al, %ah
+ and $15, %ah
+ jz L(ExitHigh4)
+
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+
+ .p2align 4
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (3)
+# ifdef USE_AS_STRNCPY
+ sub $4, %ebx
+ lea 4(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4):
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+
+ .p2align 4
+L(Exit8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ SAVE_RESULT (7)
+# ifdef USE_AS_STRNCPY
+ sub $8, %ebx
+ lea 8(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh8):
+ mov %ah, %al
+ and $15, %al
+ jz L(ExitHigh12)
+
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+
+ .p2align 4
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (11)
+# ifdef USE_AS_STRNCPY
+ sub $12, %ebx
+ lea 12(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh12):
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+
+ .p2align 4
+L(Exit16):
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT (15)
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ lea 16(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+# ifdef USE_AS_STRNCPY
+
+ CFI_PUSH(%esi)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %ebx
+ add %esi, %ecx
+ add %esi, %edx
+
+ POP (%esi)
+
+ test %al, %al
+ jz L(ExitHighCase2)
+
+ cmp $8, %ebx
+ ja L(CopyFrom1To16BytesLess8)
+
+ test $0x01, %al
+ jnz L(Exit1)
+ cmp $1, %ebx
+ je L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ cmp $2, %ebx
+ je L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ cmp $3, %ebx
+ je L(Exit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ cmp $4, %ebx
+ je L(Exit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ cmp $5, %ebx
+ je L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ cmp $6, %ebx
+ je L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ cmp $7, %ebx
+ je L(Exit7)
+ jmp L(Exit8)
+
+ .p2align 4
+L(ExitHighCase2):
+ cmp $8, %ebx
+ jbe L(CopyFrom1To16BytesLess8Case3)
+
+ test $0x01, %ah
+ jnz L(Exit9)
+ cmp $9, %ebx
+ je L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ cmp $10, %ebx
+ je L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ cmp $11, %ebx
+ je L(Exit11)
+ test $0x8, %ah
+ jnz L(Exit12)
+ cmp $12, %ebx
+ je L(Exit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ cmp $13, %ebx
+ je L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ cmp $14, %ebx
+ je L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ cmp $15, %ebx
+ je L(Exit15)
+ jmp L(Exit16)
+
+ CFI_PUSH(%esi)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase3):
+ add $16, %ebx
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+
+ cmp $8, %ebx
+ ja L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
+ cmp $4, %ebx
+ ja L(ExitHigh4Case3)
+
+ cmp $1, %ebx
+ je L(Exit1)
+ cmp $2, %ebx
+ je L(Exit2)
+ cmp $3, %ebx
+ je L(Exit3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (4)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4Case3):
+ cmp $5, %ebx
+ je L(Exit5)
+ cmp $6, %ebx
+ je L(Exit6)
+ cmp $7, %ebx
+ je L(Exit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ SAVE_RESULT (8)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh8Case3):
+ cmp $12, %ebx
+ ja L(ExitHigh12Case3)
+
+ cmp $9, %ebx
+ je L(Exit9)
+ cmp $10, %ebx
+ je L(Exit10)
+ cmp $11, %ebx
+ je L(Exit11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (12)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh12Case3):
+ cmp $13, %ebx
+ je L(Exit13)
+ cmp $14, %ebx
+ je L(Exit14)
+ cmp $15, %ebx
+ je L(Exit15)
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ SAVE_RESULT (16)
+ RETURN1
+
+# endif
+
+ .p2align 4
+L(Exit1):
+ movb (%ecx), %al
+ movb %al, (%edx)
+ SAVE_RESULT (0)
+# ifdef USE_AS_STRNCPY
+ sub $1, %ebx
+ lea 1(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit2):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ SAVE_RESULT (1)
+# ifdef USE_AS_STRNCPY
+ sub $2, %ebx
+ lea 2(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit3):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movb 2(%ecx), %al
+ movb %al, 2(%edx)
+ SAVE_RESULT (2)
+# ifdef USE_AS_STRNCPY
+ sub $3, %ebx
+ lea 3(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit5):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movb 4(%ecx), %al
+ movb %al, 4(%edx)
+ SAVE_RESULT (4)
+# ifdef USE_AS_STRNCPY
+ sub $5, %ebx
+ lea 5(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit6):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movw 4(%ecx), %ax
+ movw %ax, 4(%edx)
+ SAVE_RESULT (5)
+# ifdef USE_AS_STRNCPY
+ sub $6, %ebx
+ lea 6(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit7):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 3(%ecx), %eax
+ movl %eax, 3(%edx)
+ SAVE_RESULT (6)
+# ifdef USE_AS_STRNCPY
+ sub $7, %ebx
+ lea 7(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit9):
+ movlpd (%ecx), %xmm0
+ movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
+ movb %al, 8(%edx)
+ SAVE_RESULT (8)
+# ifdef USE_AS_STRNCPY
+ sub $9, %ebx
+ lea 9(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit10):
+ movlpd (%ecx), %xmm0
+ movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
+ movw %ax, 8(%edx)
+ SAVE_RESULT (9)
+# ifdef USE_AS_STRNCPY
+ sub $10, %ebx
+ lea 10(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit11):
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 7(%edx)
+ SAVE_RESULT (10)
+# ifdef USE_AS_STRNCPY
+ sub $11, %ebx
+ lea 11(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit13):
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT (12)
+# ifdef USE_AS_STRNCPY
+ sub $13, %ebx
+ lea 13(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit14):
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT (13)
+# ifdef USE_AS_STRNCPY
+ sub $14, %ebx
+ lea 14(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(Exit15):
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT (14)
+# ifdef USE_AS_STRNCPY
+ sub $15, %ebx
+ lea 15(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+CFI_POP (%edi)
+
+# ifdef USE_AS_STRNCPY
+ .p2align 4
+L(Fill0):
+ RETURN
+
+ .p2align 4
+L(Fill1):
+ movb %dl, (%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill2):
+ movw %dx, (%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill3):
+ movw %dx, (%ecx)
+ movb %dl, 2(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill4):
+ movl %edx, (%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill5):
+ movl %edx, (%ecx)
+ movb %dl, 4(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill6):
+ movl %edx, (%ecx)
+ movw %dx, 4(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill7):
+ movl %edx, (%ecx)
+ movl %edx, 3(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill8):
+ movlpd %xmm0, (%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill9):
+ movlpd %xmm0, (%ecx)
+ movb %dl, 8(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill10):
+ movlpd %xmm0, (%ecx)
+ movw %dx, 8(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill11):
+ movlpd %xmm0, (%ecx)
+ movl %edx, 7(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill12):
+ movlpd %xmm0, (%ecx)
+ movl %edx, 8(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill13):
+ movlpd %xmm0, (%ecx)
+ movlpd %xmm0, 5(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill14):
+ movlpd %xmm0, (%ecx)
+ movlpd %xmm0, 6(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill15):
+ movlpd %xmm0, (%ecx)
+ movlpd %xmm0, 7(%ecx)
+ RETURN
+
+ .p2align 4
+L(Fill16):
+ movlpd %xmm0, (%ecx)
+ movlpd %xmm0, 8(%ecx)
+ RETURN
+
+ .p2align 4
+L(StrncpyFillExit1):
+ lea 16(%ebx), %ebx
+L(FillFrom1To16Bytes):
+ test %ebx, %ebx
+ jz L(Fill0)
+ cmp $16, %ebx
+ je L(Fill16)
+ cmp $8, %ebx
+ je L(Fill8)
+ jg L(FillMore8)
+ cmp $4, %ebx
+ je L(Fill4)
+ jg L(FillMore4)
+ cmp $2, %ebx
+ jl L(Fill1)
+ je L(Fill2)
+ jg L(Fill3)
+L(FillMore8): /* but less than 16 */
+ cmp $12, %ebx
+ je L(Fill12)
+ jl L(FillLess12)
+ cmp $14, %ebx
+ jl L(Fill13)
+ je L(Fill14)
+ jg L(Fill15)
+L(FillMore4): /* but less than 8 */
+ cmp $6, %ebx
+ jl L(Fill5)
+ je L(Fill6)
+ jg L(Fill7)
+L(FillLess12): /* but more than 8 */
+ cmp $10, %ebx
+ jl L(Fill9)
+ je L(Fill10)
+ jmp L(Fill11)
+
+ CFI_PUSH(%edi)
+
+ .p2align 4
+L(StrncpyFillTailWithZero1):
+ POP (%edi)
+L(StrncpyFillTailWithZero):
+ pxor %xmm0, %xmm0
+ xor %edx, %edx
+ sub $16, %ebx
+ jbe L(StrncpyFillExit1)
+
+ movlpd %xmm0, (%ecx)
+ movlpd %xmm0, 8(%ecx)
+
+ lea 16(%ecx), %ecx
+
+ mov %ecx, %edx
+ and $0xf, %edx
+ sub %edx, %ecx
+ add %edx, %ebx
+ xor %edx, %edx
+ sub $64, %ebx
+ jb L(StrncpyFillLess64)
+
+L(StrncpyFillLoopMovdqa):
+ movdqa %xmm0, (%ecx)
+ movdqa %xmm0, 16(%ecx)
+ movdqa %xmm0, 32(%ecx)
+ movdqa %xmm0, 48(%ecx)
+ lea 64(%ecx), %ecx
+ sub $64, %ebx
+ jae L(StrncpyFillLoopMovdqa)
+
+L(StrncpyFillLess64):
+ add $32, %ebx
+ jl L(StrncpyFillLess32)
+ movdqa %xmm0, (%ecx)
+ movdqa %xmm0, 16(%ecx)
+ lea 32(%ecx), %ecx
+ sub $16, %ebx
+ jl L(StrncpyFillExit1)
+ movdqa %xmm0, (%ecx)
+ lea 16(%ecx), %ecx
+ jmp L(FillFrom1To16Bytes)
+
+L(StrncpyFillLess32):
+ add $16, %ebx
+ jl L(StrncpyFillExit1)
+ movdqa %xmm0, (%ecx)
+ lea 16(%ecx), %ecx
+ jmp L(FillFrom1To16Bytes)
+# endif
+
+ .p2align 4
+L(ExitTail1):
+ movb (%ecx), %al
+ movb %al, (%edx)
+ SAVE_RESULT_TAIL (0)
+# ifdef USE_AS_STRNCPY
+ sub $1, %ebx
+ lea 1(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail2):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ SAVE_RESULT_TAIL (1)
+# ifdef USE_AS_STRNCPY
+ sub $2, %ebx
+ lea 2(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail3):
+ movw (%ecx), %ax
+ movw %ax, (%edx)
+ movb 2(%ecx), %al
+ movb %al, 2(%edx)
+ SAVE_RESULT_TAIL (2)
+# ifdef USE_AS_STRNCPY
+ sub $3, %ebx
+ lea 3(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT_TAIL (3)
+# ifdef USE_AS_STRNCPY
+ sub $4, %ebx
+ lea 4(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail5):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movb 4(%ecx), %al
+ movb %al, 4(%edx)
+ SAVE_RESULT_TAIL (4)
+# ifdef USE_AS_STRNCPY
+ sub $5, %ebx
+ lea 5(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail6):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movw 4(%ecx), %ax
+ movw %ax, 4(%edx)
+ SAVE_RESULT_TAIL (5)
+# ifdef USE_AS_STRNCPY
+ sub $6, %ebx
+ lea 6(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail7):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl 3(%ecx), %eax
+ movl %eax, 3(%edx)
+ SAVE_RESULT_TAIL (6)
+# ifdef USE_AS_STRNCPY
+ sub $7, %ebx
+ lea 7(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ SAVE_RESULT_TAIL (7)
+# ifdef USE_AS_STRNCPY
+ sub $8, %ebx
+ lea 8(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail9):
+ movlpd (%ecx), %xmm0
+ movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
+ movb %al, 8(%edx)
+ SAVE_RESULT_TAIL (8)
+# ifdef USE_AS_STRNCPY
+ sub $9, %ebx
+ lea 9(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail10):
+ movlpd (%ecx), %xmm0
+ movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
+ movw %ax, 8(%edx)
+ SAVE_RESULT_TAIL (9)
+# ifdef USE_AS_STRNCPY
+ sub $10, %ebx
+ lea 10(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail11):
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 7(%edx)
+ SAVE_RESULT_TAIL (10)
+# ifdef USE_AS_STRNCPY
+ sub $11, %ebx
+ lea 11(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail12):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT_TAIL (11)
+# ifdef USE_AS_STRNCPY
+ sub $12, %ebx
+ lea 12(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail13):
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT_TAIL (12)
+# ifdef USE_AS_STRNCPY
+ sub $13, %ebx
+ lea 13(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail14):
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT_TAIL (13)
+# ifdef USE_AS_STRNCPY
+ sub $14, %ebx
+ lea 14(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail15):
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT_TAIL (14)
+# ifdef USE_AS_STRNCPY
+ sub $15, %ebx
+ lea 15(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# endif
+ RETURN
+
+ .p2align 4
+L(ExitTail16):
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT_TAIL (15)
+# ifdef USE_AS_STRNCPY
+ sub $16, %ebx
+ lea 16(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN
+# endif
+
+# ifdef USE_AS_STRNCPY
+# ifndef USE_AS_STRCAT
+ CFI_PUSH (%esi)
+ CFI_PUSH (%edi)
+# endif
+ .p2align 4
+L(StrncpyLeaveCase2OrCase3):
+ test %eax, %eax
+ jnz L(Aligned64LeaveCase2)
+
+L(Aligned64LeaveCase3):
+ add $48, %ebx
+ jle L(CopyFrom1To16BytesCase3)
+ movaps %xmm4, -64(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase3)
+ movaps %xmm5, -48(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase3)
+ movaps %xmm6, -32(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(Aligned64LeaveCase2):
+ pcmpeqb %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ add $48, %ebx
+ jle L(CopyFrom1To16BytesCase2OrCase3)
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm5, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm4, -64(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm6, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm5, -48(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(CopyFrom1To16BytesCase2OrCase3)
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqb %xmm7, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm6, -32(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+ jmp L(CopyFrom1To16BytesCase2)
+
+/*--------------------------------------------------*/
+ .p2align 4
+L(StrncpyExit1Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
+ mov $15, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit2Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
+ mov $14, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit3Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
+ mov $13, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit4Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
+ mov $12, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit5Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
+ mov $11, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit6Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
+ mov $10, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit7Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
+ mov $9, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit8Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ mov $8, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit9Case2OrCase3):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ mov $7, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit10Case2OrCase3):
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
+ mov $6, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit11Case2OrCase3):
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
+ mov $5, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit12Case2OrCase3):
+ movl (%ecx), %esi
+ movl %esi, (%edx)
+ mov $4, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit13Case2OrCase3):
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
+ mov $3, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit14Case2OrCase3):
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
+ mov $2, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+ .p2align 4
+L(StrncpyExit15Case2OrCase3):
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
+ mov $1, %esi
+ test %eax, %eax
+ jnz L(CopyFrom1To16BytesCase2)
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave1):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit1)
+ palignr $1, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 31(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit1)
+ palignr $1, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit1)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit1)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit1):
+ lea 15(%edx, %esi), %edx
+ lea 15(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave2):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit2)
+ palignr $2, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 30(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit2)
+ palignr $2, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit2)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit2)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit2):
+ lea 14(%edx, %esi), %edx
+ lea 14(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave3):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit3)
+ palignr $3, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 29(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit3)
+ palignr $3, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit3)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit3)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit3):
+ lea 13(%edx, %esi), %edx
+ lea 13(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave4):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit4)
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit4)
+ palignr $4, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit4)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit4)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit4):
+ lea 12(%edx, %esi), %edx
+ lea 12(%ecx, %esi), %ecx
+ movlpd -12(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -12(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave5):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit5)
+ palignr $5, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 27(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit5)
+ palignr $5, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit5)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit5)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit5):
+ lea 11(%edx, %esi), %edx
+ lea 11(%ecx, %esi), %ecx
+ movlpd -11(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -11(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave6):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit6)
+ palignr $6, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 26(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit6)
+ palignr $6, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit6)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit6)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit6):
+ lea 10(%edx, %esi), %edx
+ lea 10(%ecx, %esi), %ecx
+
+ movlpd -10(%ecx), %xmm0
+ movw -2(%ecx), %ax
+ movlpd %xmm0, -10(%edx)
+ movw %ax, -2(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave7):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit7)
+ palignr $7, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 25(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit7)
+ palignr $7, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit7)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit7)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit7):
+ lea 9(%edx, %esi), %edx
+ lea 9(%ecx, %esi), %ecx
+
+ movlpd -9(%ecx), %xmm0
+ movb -1(%ecx), %ah
+ movlpd %xmm0, -9(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave8):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit8)
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit8)
+ palignr $8, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit8)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit8)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit8):
+ lea 8(%edx, %esi), %edx
+ lea 8(%ecx, %esi), %ecx
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave9):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit9)
+ palignr $9, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 23(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit9)
+ palignr $9, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit9)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit9)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit9):
+ lea 7(%edx, %esi), %edx
+ lea 7(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave10):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit10)
+ palignr $10, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 22(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit10)
+ palignr $10, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit10)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit10)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit10):
+ lea 6(%edx, %esi), %edx
+ lea 6(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave11):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit11)
+ palignr $11, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 21(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit11)
+ palignr $11, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit11)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit11)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit11):
+ lea 5(%edx, %esi), %edx
+ lea 5(%ecx, %esi), %ecx
+ movl -5(%ecx), %esi
+ movb -1(%ecx), %ah
+ movl %esi, -5(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave12):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit12)
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit12)
+ palignr $12, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit12)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit12)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit12):
+ lea 4(%edx, %esi), %edx
+ lea 4(%ecx, %esi), %ecx
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave13):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit13)
+ palignr $13, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 19(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit13)
+ palignr $13, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit13)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit13)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit13):
+ lea 3(%edx, %esi), %edx
+ lea 3(%ecx, %esi), %ecx
+
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave14):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit14)
+ palignr $14, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 18(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit14)
+ palignr $14, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit14)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit14)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit14):
+ lea 2(%edx, %esi), %edx
+ lea 2(%ecx, %esi), %ecx
+ movw -2(%ecx), %ax
+ movw %ax, -2(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave15):
+ movaps %xmm2, %xmm3
+ add $48, %ebx
+ jle L(StrncpyExit15)
+ palignr $15, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 17(%ecx), %xmm2
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit15)
+ palignr $15, %xmm3, %xmm2
+ movaps %xmm2, 16(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit15)
+ movaps %xmm4, 32(%edx)
+ lea 16(%esi), %esi
+ sub $16, %ebx
+ jbe L(StrncpyExit15)
+ movaps %xmm5, 48(%edx)
+ lea 16(%esi), %esi
+ lea -16(%ebx), %ebx
+L(StrncpyExit15):
+ lea 1(%edx, %esi), %edx
+ lea 1(%ecx, %esi), %ecx
+ movb -1(%ecx), %ah
+ movb %ah, -1(%edx)
+ xor %esi, %esi
+ jmp L(CopyFrom1To16BytesCase3)
+# endif
+
+# ifndef USE_AS_STRCAT
+# ifdef USE_AS_STRNCPY
+ CFI_POP (%esi)
+ CFI_POP (%edi)
+
+ .p2align 4
+L(ExitTail0):
+ movl %edx, %eax
+ RETURN
+
+ .p2align 4
+L(StrncpyExit15Bytes):
+ cmp $12, %ebx
+ jbe L(StrncpyExit12Bytes)
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ cmpb $0, 11(%ecx)
+ jz L(ExitTail12)
+ cmp $13, %ebx
+ je L(ExitTail13)
+ cmpb $0, 12(%ecx)
+ jz L(ExitTail13)
+ cmp $14, %ebx
+ je L(ExitTail14)
+ cmpb $0, 13(%ecx)
+ jz L(ExitTail14)
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
+# ifdef USE_AS_STPCPY
+ lea 14(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit12Bytes):
+ cmp $9, %ebx
+ je L(ExitTail9)
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmp $10, %ebx
+ je L(ExitTail10)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmp $11, %ebx
+ je L(ExitTail11)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT_TAIL (11)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit8Bytes):
+ cmp $4, %ebx
+ jbe L(StrncpyExit4Bytes)
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ cmpb $0, 3(%ecx)
+ jz L(ExitTail4)
+
+ cmp $5, %ebx
+ je L(ExitTail5)
+ cmpb $0, 4(%ecx)
+ jz L(ExitTail5)
+ cmp $6, %ebx
+ je L(ExitTail6)
+ cmpb $0, 5(%ecx)
+ jz L(ExitTail6)
+ cmp $7, %ebx
+ je L(ExitTail7)
+ cmpb $0, 6(%ecx)
+ jz L(ExitTail7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+# ifdef USE_AS_STPCPY
+ lea 7(%edx), %eax
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# else
+ movl %edx, %eax
+# endif
+ RETURN
+
+ .p2align 4
+L(StrncpyExit4Bytes):
+ test %ebx, %ebx
+ jz L(ExitTail0)
+ cmp $1, %ebx
+ je L(ExitTail1)
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmp $2, %ebx
+ je L(ExitTail2)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmp $3, %ebx
+ je L(ExitTail3)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT_TAIL (3)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+# endif
+
+END (STRCPY)
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy.S
new file mode 100644
index 0000000000..ffbc03c6d5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcpy.S
@@ -0,0 +1,116 @@
+/* Multiple versions of strcpy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
+# ifndef STRCPY
+# define STRCPY strcpy
+# endif
+#endif
+
+#ifdef USE_AS_STPCPY
+# ifdef USE_AS_STRNCPY
+# define STRCPY_SSSE3 __stpncpy_ssse3
+# define STRCPY_SSE2 __stpncpy_sse2
+# define STRCPY_IA32 __stpncpy_ia32
+# define __GI_STRCPY __GI_stpncpy
+# define __GI___STRCPY __GI___stpncpy
+# else
+# define STRCPY_SSSE3 __stpcpy_ssse3
+# define STRCPY_SSE2 __stpcpy_sse2
+# define STRCPY_IA32 __stpcpy_ia32
+# define __GI_STRCPY __GI_stpcpy
+# define __GI___STRCPY __GI___stpcpy
+# endif
+#else
+# ifdef USE_AS_STRNCPY
+# define STRCPY_SSSE3 __strncpy_ssse3
+# define STRCPY_SSE2 __strncpy_sse2
+# define STRCPY_IA32 __strncpy_ia32
+# define __GI_STRCPY __GI_strncpy
+# else
+# define STRCPY_SSSE3 __strcpy_ssse3
+# define STRCPY_SSE2 __strcpy_sse2
+# define STRCPY_IA32 __strcpy_ia32
+# define __GI_STRCPY __GI_strcpy
+# endif
+#endif
+
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncpy in static library since we
+ need strncpy before the initialization happened. */
+#if IS_IN (libc)
+
+ .text
+ENTRY(STRCPY)
+ .type STRCPY, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (STRCPY_IA32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (STRCPY_SSE2)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (STRCPY_SSSE3)
+2: ret
+END(STRCPY)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCPY_IA32, @function; \
+ .align 16; \
+ .globl STRCPY_IA32; \
+ .hidden STRCPY_IA32; \
+ STRCPY_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCPY_IA32, .-STRCPY_IA32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
+ The speedup we get from using SSSE3 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCPY; __GI_STRCPY = STRCPY_IA32
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ .globl __GI___STRCPY; __GI___STRCPY = STRCPY_IA32
+
+# endif
+#endif
+
+#ifdef USE_AS_STPCPY
+# ifdef USE_AS_STRNCPY
+# include "../../stpncpy.S"
+# else
+# include "../../i586/stpcpy.S"
+# endif
+#else
+# ifndef USE_AS_STRNCPY
+# include "../../i586/strcpy.S"
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcspn-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strcspn-c.c
new file mode 100644
index 0000000000..6d61e190a8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcspn-c.c
@@ -0,0 +1,2 @@
+#define __strcspn_sse2 __strcspn_ia32
+#include <sysdeps/x86_64/multiarch/strcspn-c.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strcspn.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strcspn.S
new file mode 100644
index 0000000000..21e5093924
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strcspn.S
@@ -0,0 +1,75 @@
+/* Multiple versions of strcspn
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifdef USE_AS_STRPBRK
+#define STRCSPN_SSE42 __strpbrk_sse42
+#define STRCSPN_IA32 __strpbrk_ia32
+#define __GI_STRCSPN __GI_strpbrk
+#else
+#ifndef STRCSPN
+#define STRCSPN strcspn
+#define STRCSPN_SSE42 __strcspn_sse42
+#define STRCSPN_IA32 __strcspn_ia32
+#define __GI_STRCSPN __GI_strcspn
+#endif
+#endif
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strpbrk in static library since we
+ need strpbrk before the initialization happened. */
+#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
+ .text
+ENTRY(STRCSPN)
+ .type STRCSPN, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (STRCSPN_IA32)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (STRCSPN_SSE42)
+2: ret
+END(STRCSPN)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCSPN_IA32, @function; \
+ .globl STRCSPN_IA32; \
+ .p2align 4; \
+ STRCSPN_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCSPN_IA32, .-STRCSPN_IA32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_IA32
+#endif
+
+#ifdef USE_AS_STRPBRK
+#include "../../strpbrk.S"
+#else
+#include "../../strcspn.S"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
new file mode 100644
index 0000000000..d3ea864bab
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
@@ -0,0 +1,125 @@
+/* strlen with SSE2 and BSF
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined SHARED && IS_IN (libc)
+
+#include <sysdep.h>
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+#define PARMS 4 + 8 /* Preserve ESI and EDI. */
+#define STR PARMS
+#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state
+#define RETURN POP (%edi); POP (%esi); ret; \
+ cfi_restore_state; cfi_remember_state
+
+ .text
+ENTRY ( __strlen_sse2_bsf)
+ ENTRANCE
+ mov STR(%esp), %edi
+ xor %eax, %eax
+ mov %edi, %ecx
+ and $0x3f, %ecx
+ pxor %xmm0, %xmm0
+ cmp $0x30, %ecx
+ ja L(next)
+ movdqu (%edi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit_less16)
+ mov %edi, %eax
+ and $-16, %eax
+ jmp L(align16_start)
+L(next):
+
+ mov %edi, %eax
+ and $-16, %eax
+ pcmpeqb (%eax), %xmm0
+ mov $-1, %esi
+ sub %eax, %ecx
+ shl %cl, %esi
+ pmovmskb %xmm0, %edx
+ and %esi, %edx
+ jnz L(exit)
+L(align16_start):
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+ .p2align 4
+L(align16_loop):
+ pcmpeqb 16(%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ lea 64(%eax), %eax
+ test %edx, %edx
+ jz L(align16_loop)
+L(exit):
+ sub %edi, %eax
+L(exit_less16):
+ bsf %edx, %edx
+ add %edx, %eax
+ RETURN
+L(exit16):
+ sub %edi, %eax
+ bsf %edx, %edx
+ add %edx, %eax
+ add $16, %eax
+ RETURN
+L(exit32):
+ sub %edi, %eax
+ bsf %edx, %edx
+ add %edx, %eax
+ add $32, %eax
+ RETURN
+L(exit48):
+ sub %edi, %eax
+ bsf %edx, %edx
+ add %edx, %eax
+ add $48, %eax
+ POP (%edi)
+ POP (%esi)
+ ret
+
+END ( __strlen_sse2_bsf)
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2.S
new file mode 100644
index 0000000000..36fc1469d0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -0,0 +1,695 @@
+/* strlen with SSE2
+ Copyright (C) 2010-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */
+
+#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && IS_IN (libc)
+
+# ifndef USE_AS_STRCAT
+
+# include <sysdep.h>
+# define PARMS 4
+# define STR PARMS
+# define RETURN ret
+
+# ifdef USE_AS_STRNLEN
+# define LEN PARMS + 8
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+# undef RETURN
+# define RETURN POP (%edi); CFI_PUSH(%edi); ret
+# endif
+
+# ifndef STRLEN
+# define STRLEN __strlen_sse2
+# endif
+
+ atom_text_section
+ENTRY (STRLEN)
+ mov STR(%esp), %edx
+# ifdef USE_AS_STRNLEN
+ PUSH (%edi)
+ movl LEN(%esp), %edi
+ sub $4, %edi
+ jbe L(len_less4_prolog)
+# endif
+# endif
+ xor %eax, %eax
+ cmpb $0, (%edx)
+ jz L(exit_tail0)
+ cmpb $0, 1(%edx)
+ jz L(exit_tail1)
+ cmpb $0, 2(%edx)
+ jz L(exit_tail2)
+ cmpb $0, 3(%edx)
+ jz L(exit_tail3)
+
+# ifdef USE_AS_STRNLEN
+ sub $4, %edi
+ jbe L(len_less8_prolog)
+# endif
+
+ cmpb $0, 4(%edx)
+ jz L(exit_tail4)
+ cmpb $0, 5(%edx)
+ jz L(exit_tail5)
+ cmpb $0, 6(%edx)
+ jz L(exit_tail6)
+ cmpb $0, 7(%edx)
+ jz L(exit_tail7)
+
+# ifdef USE_AS_STRNLEN
+ sub $4, %edi
+ jbe L(len_less12_prolog)
+# endif
+
+ cmpb $0, 8(%edx)
+ jz L(exit_tail8)
+ cmpb $0, 9(%edx)
+ jz L(exit_tail9)
+ cmpb $0, 10(%edx)
+ jz L(exit_tail10)
+ cmpb $0, 11(%edx)
+ jz L(exit_tail11)
+
+# ifdef USE_AS_STRNLEN
+ sub $4, %edi
+ jbe L(len_less16_prolog)
+# endif
+
+ cmpb $0, 12(%edx)
+ jz L(exit_tail12)
+ cmpb $0, 13(%edx)
+ jz L(exit_tail13)
+ cmpb $0, 14(%edx)
+ jz L(exit_tail14)
+ cmpb $0, 15(%edx)
+ jz L(exit_tail15)
+
+ pxor %xmm0, %xmm0
+ lea 16(%edx), %eax
+ mov %eax, %ecx
+ and $-16, %eax
+
+# ifdef USE_AS_STRNLEN
+ and $15, %edx
+ add %edx, %edi
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ pxor %xmm1, %xmm1
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ pxor %xmm2, %xmm2
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ pxor %xmm3, %xmm3
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqb (%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+# ifdef USE_AS_STRNLEN
+ mov %eax, %edx
+ and $63, %edx
+ add %edx, %edi
+# endif
+
+ and $-0x40, %eax
+
+ .p2align 4
+L(aligned_64_loop):
+# ifdef USE_AS_STRNLEN
+ sub $64, %edi
+ jbe L(len_less64)
+# endif
+ movaps (%eax), %xmm0
+ movaps 16(%eax), %xmm1
+ movaps 32(%eax), %xmm2
+ movaps 48(%eax), %xmm6
+ pminub %xmm1, %xmm0
+ pminub %xmm6, %xmm2
+ pminub %xmm0, %xmm2
+ pcmpeqb %xmm3, %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ lea 64(%eax), %eax
+ jz L(aligned_64_loop)
+
+ pcmpeqb -64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 48(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqb -32(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqb %xmm6, %xmm3
+ pmovmskb %xmm3, %edx
+ lea -16(%ecx), %ecx
+L(exit):
+ sub %ecx, %eax
+ test %dl, %dl
+ jz L(exit_high)
+
+ mov %dl, %cl
+ and $15, %cl
+ jz L(exit_8)
+ test $0x01, %dl
+ jnz L(exit_tail0)
+ test $0x02, %dl
+ jnz L(exit_tail1)
+ test $0x04, %dl
+ jnz L(exit_tail2)
+ add $3, %eax
+ RETURN
+
+ .p2align 4
+L(exit_8):
+ test $0x10, %dl
+ jnz L(exit_tail4)
+ test $0x20, %dl
+ jnz L(exit_tail5)
+ test $0x40, %dl
+ jnz L(exit_tail6)
+ add $7, %eax
+ RETURN
+
+ .p2align 4
+L(exit_high):
+ mov %dh, %ch
+ and $15, %ch
+ jz L(exit_high_8)
+ test $0x01, %dh
+ jnz L(exit_tail8)
+ test $0x02, %dh
+ jnz L(exit_tail9)
+ test $0x04, %dh
+ jnz L(exit_tail10)
+ add $11, %eax
+ RETURN
+
+ .p2align 4
+L(exit_high_8):
+ test $0x10, %dh
+ jnz L(exit_tail12)
+ test $0x20, %dh
+ jnz L(exit_tail13)
+ test $0x40, %dh
+ jnz L(exit_tail14)
+ add $15, %eax
+L(exit_tail0):
+ RETURN
+
+# ifdef USE_AS_STRNLEN
+
+ .p2align 4
+L(len_less64):
+ pxor %xmm0, %xmm0
+ add $64, %edi
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ pxor %xmm1, %xmm1
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ sub $16, %edi
+ jbe L(return_start_len)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ sub $16, %edi
+ jbe L(return_start_len)
+
+ pcmpeqb (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ sub $16, %edi
+ jbe L(return_start_len)
+
+ pcmpeqb (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ lea 16(%eax), %eax
+ test %edx, %edx
+ jnz L(strnlen_exit)
+
+ movl LEN(%esp), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit):
+ sub %ecx, %eax
+
+ test %dl, %dl
+ jz L(strnlen_exit_high)
+ mov %dl, %cl
+ and $15, %cl
+ jz L(strnlen_exit_8)
+ test $0x01, %dl
+ jnz L(exit_tail0)
+ test $0x02, %dl
+ jnz L(strnlen_exit_tail1)
+ test $0x04, %dl
+ jnz L(strnlen_exit_tail2)
+ sub $4, %edi
+ jb L(return_start_len)
+ lea 3(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_8):
+ test $0x10, %dl
+ jnz L(strnlen_exit_tail4)
+ test $0x20, %dl
+ jnz L(strnlen_exit_tail5)
+ test $0x40, %dl
+ jnz L(strnlen_exit_tail6)
+ sub $8, %edi
+ jb L(return_start_len)
+ lea 7(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_high):
+ mov %dh, %ch
+ and $15, %ch
+ jz L(strnlen_exit_high_8)
+ test $0x01, %dh
+ jnz L(strnlen_exit_tail8)
+ test $0x02, %dh
+ jnz L(strnlen_exit_tail9)
+ test $0x04, %dh
+ jnz L(strnlen_exit_tail10)
+ sub $12, %edi
+ jb L(return_start_len)
+ lea 11(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_high_8):
+ test $0x10, %dh
+ jnz L(strnlen_exit_tail12)
+ test $0x20, %dh
+ jnz L(strnlen_exit_tail13)
+ test $0x40, %dh
+ jnz L(strnlen_exit_tail14)
+ sub $16, %edi
+ jb L(return_start_len)
+ lea 15(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail1):
+ sub $2, %edi
+ jb L(return_start_len)
+ lea 1(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail2):
+ sub $3, %edi
+ jb L(return_start_len)
+ lea 2(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail4):
+ sub $5, %edi
+ jb L(return_start_len)
+ lea 4(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail5):
+ sub $6, %edi
+ jb L(return_start_len)
+ lea 5(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail6):
+ sub $7, %edi
+ jb L(return_start_len)
+ lea 6(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail8):
+ sub $9, %edi
+ jb L(return_start_len)
+ lea 8(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail9):
+ sub $10, %edi
+ jb L(return_start_len)
+ lea 9(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail10):
+ sub $11, %edi
+ jb L(return_start_len)
+ lea 10(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail12):
+ sub $13, %edi
+ jb L(return_start_len)
+ lea 12(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail13):
+ sub $14, %edi
+ jb L(return_start_len)
+ lea 13(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(strnlen_exit_tail14):
+ sub $15, %edi
+ jb L(return_start_len)
+ lea 14(%eax), %eax
+ RETURN
+
+ .p2align 4
+L(return_start_len):
+ movl LEN(%esp), %eax
+ RETURN
+
+/* for prolog only */
+
+ .p2align 4
+L(len_less4_prolog):
+ xor %eax, %eax
+
+ add $4, %edi
+ jz L(exit_tail0)
+
+ cmpb $0, (%edx)
+ jz L(exit_tail0)
+ cmp $1, %edi
+ je L(exit_tail1)
+
+ cmpb $0, 1(%edx)
+ jz L(exit_tail1)
+ cmp $2, %edi
+ je L(exit_tail2)
+
+ cmpb $0, 2(%edx)
+ jz L(exit_tail2)
+ cmp $3, %edi
+ je L(exit_tail3)
+
+ cmpb $0, 3(%edx)
+ jz L(exit_tail3)
+ mov $4, %eax
+ RETURN
+
+ .p2align 4
+L(len_less8_prolog):
+ add $4, %edi
+
+ cmpb $0, 4(%edx)
+ jz L(exit_tail4)
+ cmp $1, %edi
+ je L(exit_tail5)
+
+ cmpb $0, 5(%edx)
+ jz L(exit_tail5)
+ cmp $2, %edi
+ je L(exit_tail6)
+
+ cmpb $0, 6(%edx)
+ jz L(exit_tail6)
+ cmp $3, %edi
+ je L(exit_tail7)
+
+ cmpb $0, 7(%edx)
+ jz L(exit_tail7)
+ mov $8, %eax
+ RETURN
+
+
+ .p2align 4
+L(len_less12_prolog):
+ add $4, %edi
+
+ cmpb $0, 8(%edx)
+ jz L(exit_tail8)
+ cmp $1, %edi
+ je L(exit_tail9)
+
+ cmpb $0, 9(%edx)
+ jz L(exit_tail9)
+ cmp $2, %edi
+ je L(exit_tail10)
+
+ cmpb $0, 10(%edx)
+ jz L(exit_tail10)
+ cmp $3, %edi
+ je L(exit_tail11)
+
+ cmpb $0, 11(%edx)
+ jz L(exit_tail11)
+ mov $12, %eax
+ RETURN
+
+ .p2align 4
+L(len_less16_prolog):
+ add $4, %edi
+
+ cmpb $0, 12(%edx)
+ jz L(exit_tail12)
+ cmp $1, %edi
+ je L(exit_tail13)
+
+ cmpb $0, 13(%edx)
+ jz L(exit_tail13)
+ cmp $2, %edi
+ je L(exit_tail14)
+
+ cmpb $0, 14(%edx)
+ jz L(exit_tail14)
+ cmp $3, %edi
+ je L(exit_tail15)
+
+ cmpb $0, 15(%edx)
+ jz L(exit_tail15)
+ mov $16, %eax
+ RETURN
+# endif
+
+ .p2align 4
+L(exit_tail1):
+ add $1, %eax
+ RETURN
+
+L(exit_tail2):
+ add $2, %eax
+ RETURN
+
+L(exit_tail3):
+ add $3, %eax
+ RETURN
+
+L(exit_tail4):
+ add $4, %eax
+ RETURN
+
+L(exit_tail5):
+ add $5, %eax
+ RETURN
+
+L(exit_tail6):
+ add $6, %eax
+ RETURN
+
+L(exit_tail7):
+ add $7, %eax
+ RETURN
+
+L(exit_tail8):
+ add $8, %eax
+ RETURN
+
+L(exit_tail9):
+ add $9, %eax
+ RETURN
+
+L(exit_tail10):
+ add $10, %eax
+ RETURN
+
+L(exit_tail11):
+ add $11, %eax
+ RETURN
+
+L(exit_tail12):
+ add $12, %eax
+ RETURN
+
+L(exit_tail13):
+ add $13, %eax
+ RETURN
+
+L(exit_tail14):
+ add $14, %eax
+ RETURN
+
+L(exit_tail15):
+ add $15, %eax
+# ifndef USE_AS_STRCAT
+ RETURN
+END (STRLEN)
+# endif
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strlen.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strlen.S
new file mode 100644
index 0000000000..77cf6bcdb0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strlen.S
@@ -0,0 +1,60 @@
+/* Multiple versions of strlen
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc and for the
+ DSO. In static binaries, we need strlen before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+ .text
+ENTRY(strlen)
+ .type strlen, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strlen_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf)
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strlen_sse2)
+2: ret
+END(strlen)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strlen_ia32, @function; \
+ .globl __strlen_ia32; \
+ .p2align 4; \
+ __strlen_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strlen_ia32, .-__strlen_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strlen; __GI_strlen = __strlen_ia32
+#endif
+
+#include "../../i586/strlen.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncase-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase-c.c
new file mode 100644
index 0000000000..76581eb62b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase-c.c
@@ -0,0 +1,8 @@
+#include <string.h>
+
+extern __typeof (strncasecmp) __strncasecmp_nonascii;
+
+#define __strncasecmp __strncasecmp_nonascii
+#include <string/strncase.c>
+
+strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncase.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase.S
new file mode 100644
index 0000000000..a56e63a566
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase.S
@@ -0,0 +1,39 @@
+/* Entry point for multi-version x86 strncasecmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+ .text
+ENTRY(__strncasecmp)
+ .type __strncasecmp, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strncasecmp_ia32)
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
+ jnz 2f
+ LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2)
+2: ret
+END(__strncasecmp)
+
+weak_alias (__strncasecmp, strncasecmp)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-c.c
new file mode 100644
index 0000000000..7e601af271
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-c.c
@@ -0,0 +1,13 @@
+#include <string.h>
+
+extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii;
+
+#define __strncasecmp_l __strncasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL 1
+#include <string/strncase.c>
+
+strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_ia32)
+
+/* The needs of strcasecmp in libc are minimal, no need to go through
+ the IFUNC. */
+strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-sse4.S
new file mode 100644
index 0000000000..557210832e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-sse4.S
@@ -0,0 +1,2 @@
+#define USE_AS_STRNCASECMP_L 1
+#include "strcmp-sse4.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S
new file mode 100644
index 0000000000..d438a1ae35
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S
@@ -0,0 +1,2 @@
+#define USE_AS_STRNCASECMP_L 1
+#include "strcmp-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l.S
new file mode 100644
index 0000000000..8a74ee8574
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncase_l.S
@@ -0,0 +1,7 @@
+/* Multiple versions of strncasecmp_l
+ All versions must be listed in ifunc-impl-list.c. */
+#define STRCMP __strncasecmp_l
+#define USE_AS_STRNCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strncasecmp_l, strncasecmp_l)
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-c.c
new file mode 100644
index 0000000000..132a000545
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-c.c
@@ -0,0 +1,8 @@
+#define STRNCAT __strncat_ia32
+#ifdef SHARED
+#undef libc_hidden_def
+#define libc_hidden_def(name) \
+ __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
+#endif
+
+#include "string/strncat.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-sse2.S
new file mode 100644
index 0000000000..f1045b72b8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-sse2.S
@@ -0,0 +1,4 @@
+#define STRCAT __strncat_sse2
+#define USE_AS_STRNCAT
+
+#include "strcat-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-ssse3.S
new file mode 100644
index 0000000000..625b90a978
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat-ssse3.S
@@ -0,0 +1,4 @@
+#define STRCAT __strncat_ssse3
+#define USE_AS_STRNCAT
+
+#include "strcat-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncat.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat.S
new file mode 100644
index 0000000000..5c1bf41453
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncat.S
@@ -0,0 +1,5 @@
+/* Multiple versions of strncat
+ All versions must be listed in ifunc-impl-list.c. */
+#define STRCAT strncat
+#define USE_AS_STRNCAT
+#include "strcat.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-c.c
new file mode 100644
index 0000000000..cc059da494
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-c.c
@@ -0,0 +1,8 @@
+#ifdef SHARED
+# define STRNCMP __strncmp_ia32
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32);
+#endif
+
+#include "string/strncmp.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-sse4.S
new file mode 100644
index 0000000000..cf14dfaf6c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-sse4.S
@@ -0,0 +1,5 @@
+#ifdef SHARED
+# define USE_AS_STRNCMP
+# define STRCMP __strncmp_sse4_2
+# include "strcmp-sse4.S"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
new file mode 100644
index 0000000000..536c8685f2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
@@ -0,0 +1,5 @@
+#ifdef SHARED
+# define USE_AS_STRNCMP
+# define STRCMP __strncmp_ssse3
+# include "strcmp-ssse3.S"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp.S
new file mode 100644
index 0000000000..150d4786d2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncmp.S
@@ -0,0 +1,5 @@
+/* Multiple versions of strncmp
+ All versions must be listed in ifunc-impl-list.c. */
+#define USE_AS_STRNCMP
+#define STRCMP strncmp
+#include "strcmp.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-c.c
new file mode 100644
index 0000000000..201e3f98b3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-c.c
@@ -0,0 +1,8 @@
+#define STRNCPY __strncpy_ia32
+#ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strncpy_ia32, __GI_strncpy, __strncpy_ia32);
+#endif
+
+#include "string/strncpy.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-sse2.S
new file mode 100644
index 0000000000..bdd99239a4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNCPY
+#define STRCPY __strncpy_sse2
+#include "strcpy-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
new file mode 100644
index 0000000000..bf82ee447d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNCPY
+#define STRCPY __strncpy_ssse3
+#include "strcpy-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy.S
new file mode 100644
index 0000000000..9c257efc6e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strncpy.S
@@ -0,0 +1,5 @@
+/* Multiple versions of strncpy
+ All versions must be listed in ifunc-impl-list.c. */
+#define USE_AS_STRNCPY
+#define STRCPY strncpy
+#include "strcpy.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-c.c
new file mode 100644
index 0000000000..351e939a93
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-c.c
@@ -0,0 +1,10 @@
+#define STRNLEN __strnlen_ia32
+#ifdef SHARED
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); \
+ strong_alias (__strnlen_ia32, __strnlen_ia32_1); \
+ __hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1);
+#endif
+
+#include "string/strnlen.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-sse2.S
new file mode 100644
index 0000000000..56b6ae2a5c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNLEN
+#define STRLEN __strnlen_sse2
+#include "strlen-sse2.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen.S
new file mode 100644
index 0000000000..d241522c70
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strnlen.S
@@ -0,0 +1,37 @@
+/* Multiple versions of strnlen
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(__strnlen)
+ .type __strnlen, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strnlen_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strnlen_sse2)
+2: ret
+END(__strnlen)
+
+weak_alias(__strnlen, strnlen)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk-c.c
new file mode 100644
index 0000000000..5db62053b3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk-c.c
@@ -0,0 +1,2 @@
+#define __strpbrk_sse2 __strpbrk_ia32
+#include <sysdeps/x86_64/multiarch/strpbrk-c.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk.S
new file mode 100644
index 0000000000..7201d6376f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strpbrk.S
@@ -0,0 +1,5 @@
+/* Multiple versions of strpbrk
+ All versions must be listed in ifunc-impl-list.c. */
+#define STRCSPN strpbrk
+#define USE_AS_STRPBRK
+#include "strcspn.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
new file mode 100644
index 0000000000..39a7c8825b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
@@ -0,0 +1,282 @@
+/* strrchr with SSE2 with bsf and bsr
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define STR1 PARMS
+# define STR2 STR1+4
+
+ .text
+ENTRY (__strrchr_sse2_bsf)
+
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+ PUSH (%edi)
+ pxor %xmm2, %xmm2
+ mov %ecx, %edi
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+ /* ECX has OFFSET. */
+ and $63, %ecx
+ cmp $48, %ecx
+ pshufd $0, %xmm1, %xmm1
+ ja L(crosscashe)
+
+/* unaligned string. */
+ movdqu (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm2, %edx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+
+ test %eax, %eax
+ jnz L(unaligned_match1)
+
+ test %edx, %edx
+ jnz L(return_null)
+
+ and $-16, %edi
+ add $16, %edi
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ xor %ebx, %ebx
+ jmp L(loop)
+
+ CFI_POP (%esi)
+ CFI_POP (%ebx)
+
+ .p2align 4
+L(unaligned_return_value1):
+ bsf %edx, %ecx
+ mov $2, %edx
+ shl %cl, %edx
+ sub $1, %edx
+ and %edx, %eax
+ jz L(return_null)
+ bsr %eax, %eax
+ add %edi, %eax
+ POP (%edi)
+ ret
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(unaligned_match1):
+ test %edx, %edx
+ jnz L(unaligned_return_value1)
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ mov %eax, %ebx
+ lea 16(%edi), %esi
+ and $-16, %edi
+ add $16, %edi
+ jmp L(loop)
+
+ CFI_POP (%esi)
+ CFI_POP (%ebx)
+
+ .p2align 4
+ L(crosscashe):
+/* Hancle unaligned string. */
+ and $15, %ecx
+ and $-16, %edi
+ pxor %xmm3, %xmm3
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm3
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm3, %edx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ /* Remove the leading bytes. */
+ shr %cl, %edx
+ shr %cl, %eax
+
+ test %eax, %eax
+ jnz L(unaligned_match)
+
+ test %edx, %edx
+ jnz L(return_null)
+
+ add $16, %edi
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ xor %ebx, %ebx
+ jmp L(loop)
+
+ CFI_POP (%esi)
+ CFI_POP (%ebx)
+
+ .p2align 4
+L(unaligned_return_value):
+ add %ecx, %edi
+ bsf %edx, %ecx
+ mov $2, %edx
+ shl %cl, %edx
+ sub $1, %edx
+ and %edx, %eax
+ jz L(return_null)
+ bsr %eax, %eax
+ add %edi, %eax
+ POP (%edi)
+ ret
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(unaligned_match):
+ test %edx, %edx
+ jnz L(unaligned_return_value)
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ mov %eax, %ebx
+ add $16, %edi
+ lea (%edi, %ecx), %esi
+
+/* Loop start on aligned string. */
+ .p2align 4
+L(loop):
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jz L(loop)
+
+L(matches):
+ test %eax, %eax
+ jnz L(match)
+L(return_value):
+ test %ebx, %ebx
+ jz L(return_null_1)
+ bsr %ebx, %eax
+ add %esi, %eax
+
+ POP (%ebx)
+ POP (%esi)
+
+ sub $16, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(match):
+ pmovmskb %xmm2, %ecx
+ test %ecx, %ecx
+ jnz L(return_value_1)
+ mov %eax, %ebx
+ mov %edi, %esi
+ jmp L(loop)
+
+ .p2align 4
+L(return_value_1):
+ bsf %ecx, %ecx
+ mov $2, %edx
+ shl %cl, %edx
+ sub $1, %edx
+ and %edx, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+
+ bsr %eax, %eax
+ add %edi, %eax
+ sub $16, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+/* Return NULL. */
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ POP (%edi)
+ ret
+
+ CFI_PUSH (%edi)
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+/* Return NULL. */
+ .p2align 4
+L(return_null_1):
+ POP (%ebx)
+ POP (%esi)
+ POP (%edi)
+ xor %eax, %eax
+ ret
+
+END (__strrchr_sse2_bsf)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2.S
new file mode 100644
index 0000000000..20934288be
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr-sse2.S
@@ -0,0 +1,708 @@
+/* strrchr SSE2 without bsf and bsr
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 8
+# define ENTRANCE PUSH(%edi);
+# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
+
+# define STR1 PARMS
+# define STR2 STR1+4
+
+ atom_text_section
+ENTRY (__strrchr_sse2)
+
+ ENTRANCE
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+ pxor %xmm2, %xmm2
+ mov %ecx, %edi
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+ /* ECX has OFFSET. */
+ and $63, %ecx
+ cmp $48, %ecx
+ pshufd $0, %xmm1, %xmm1
+ ja L(crosscache)
+
+/* unaligned string. */
+ movdqu (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm2, %ecx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ add $16, %edi
+
+ test %eax, %eax
+ jnz L(unaligned_match1)
+
+ test %ecx, %ecx
+ jnz L(return_null)
+
+ and $-16, %edi
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ xor %ebx, %ebx
+ jmp L(loop)
+
+ CFI_POP (%esi)
+ CFI_POP (%ebx)
+
+ .p2align 4
+L(unaligned_match1):
+ test %ecx, %ecx
+ jnz L(prolog_find_zero_1)
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ mov %eax, %ebx
+ mov %edi, %esi
+ and $-16, %edi
+ jmp L(loop)
+
+ CFI_POP (%esi)
+ CFI_POP (%ebx)
+
+ .p2align 4
+L(crosscache):
+/* Hancle unaligned string. */
+ and $15, %ecx
+ and $-16, %edi
+ pxor %xmm3, %xmm3
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm3
+ pcmpeqb %xmm1, %xmm0
+ /* Find where NULL is. */
+ pmovmskb %xmm3, %edx
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ /* Remove the leading bytes. */
+ shr %cl, %edx
+ shr %cl, %eax
+ add $16, %edi
+
+ test %eax, %eax
+ jnz L(unaligned_match)
+
+ test %edx, %edx
+ jnz L(return_null)
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ xor %ebx, %ebx
+ jmp L(loop)
+
+ CFI_POP (%esi)
+ CFI_POP (%ebx)
+
+ .p2align 4
+L(unaligned_match):
+ test %edx, %edx
+ jnz L(prolog_find_zero)
+
+ PUSH (%esi)
+ PUSH (%ebx)
+
+ mov %eax, %ebx
+ lea (%edi, %ecx), %esi
+
+/* Loop start on aligned string. */
+ .p2align 4
+L(loop):
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm0
+ pcmpeqb %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jz L(loop)
+
+L(matches):
+ test %eax, %eax
+ jnz L(match)
+L(return_value):
+ test %ebx, %ebx
+ jz L(return_null_1)
+ mov %ebx, %eax
+ mov %esi, %edi
+
+ POP (%ebx)
+ POP (%esi)
+
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(return_null_1):
+ POP (%ebx)
+ POP (%esi)
+
+ xor %eax, %eax
+ RETURN
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(match):
+ pmovmskb %xmm2, %ecx
+ test %ecx, %ecx
+ jnz L(find_zero)
+ mov %eax, %ebx
+ mov %edi, %esi
+ jmp L(loop)
+
+ .p2align 4
+L(find_zero):
+ test %cl, %cl
+ jz L(find_zero_high)
+ mov %cl, %dl
+ and $15, %dl
+ jz L(find_zero_8)
+ test $0x01, %cl
+ jnz L(FindZeroExit1)
+ test $0x02, %cl
+ jnz L(FindZeroExit2)
+ test $0x04, %cl
+ jnz L(FindZeroExit3)
+ and $1 << 4 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(find_zero_8):
+ test $0x10, %cl
+ jnz L(FindZeroExit5)
+ test $0x20, %cl
+ jnz L(FindZeroExit6)
+ test $0x40, %cl
+ jnz L(FindZeroExit7)
+ and $1 << 8 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(find_zero_high):
+ mov %ch, %dh
+ and $15, %dh
+ jz L(find_zero_high_8)
+ test $0x01, %ch
+ jnz L(FindZeroExit9)
+ test $0x02, %ch
+ jnz L(FindZeroExit10)
+ test $0x04, %ch
+ jnz L(FindZeroExit11)
+ and $1 << 12 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(find_zero_high_8):
+ test $0x10, %ch
+ jnz L(FindZeroExit13)
+ test $0x20, %ch
+ jnz L(FindZeroExit14)
+ test $0x40, %ch
+ jnz L(FindZeroExit15)
+ and $1 << 16 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit1):
+ and $1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit2):
+ and $1 << 2 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit3):
+ and $1 << 3 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit5):
+ and $1 << 5 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit6):
+ and $1 << 6 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit7):
+ and $1 << 7 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit9):
+ and $1 << 9 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit10):
+ and $1 << 10 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit11):
+ and $1 << 11 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit13):
+ and $1 << 13 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit14):
+ and $1 << 14 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+ jmp L(match_exit)
+
+ CFI_PUSH (%ebx)
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(FindZeroExit15):
+ and $1 << 15 - 1, %eax
+ jz L(return_value)
+
+ POP (%ebx)
+ POP (%esi)
+
+ .p2align 4
+L(match_exit):
+ test %ah, %ah
+ jnz L(match_exit_high)
+ mov %al, %dl
+ and $15 << 4, %dl
+ jnz L(match_exit_8)
+ test $0x08, %al
+ jnz L(Exit4)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x02, %al
+ jnz L(Exit2)
+ lea -16(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_exit_8):
+ test $0x80, %al
+ jnz L(Exit8)
+ test $0x40, %al
+ jnz L(Exit7)
+ test $0x20, %al
+ jnz L(Exit6)
+ lea -12(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_exit_high):
+ mov %ah, %dh
+ and $15 << 4, %dh
+ jnz L(match_exit_high_8)
+ test $0x08, %ah
+ jnz L(Exit12)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x02, %ah
+ jnz L(Exit10)
+ lea -8(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_exit_high_8):
+ test $0x80, %ah
+ jnz L(Exit16)
+ test $0x40, %ah
+ jnz L(Exit15)
+ test $0x20, %ah
+ jnz L(Exit14)
+ lea -4(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit2):
+ lea -15(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit3):
+ lea -14(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit4):
+ lea -13(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit6):
+ lea -11(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit7):
+ lea -10(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit8):
+ lea -9(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit10):
+ lea -7(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit11):
+ lea -6(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit12):
+ lea -5(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit14):
+ lea -3(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit15):
+ lea -2(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(Exit16):
+ lea -1(%edi), %eax
+ RETURN
+
+/* Return NULL. */
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero):
+ add %ecx, %edi
+ mov %edx, %ecx
+L(prolog_find_zero_1):
+ test %cl, %cl
+ jz L(prolog_find_zero_high)
+ mov %cl, %dl
+ and $15, %dl
+ jz L(prolog_find_zero_8)
+ test $0x01, %cl
+ jnz L(PrologFindZeroExit1)
+ test $0x02, %cl
+ jnz L(PrologFindZeroExit2)
+ test $0x04, %cl
+ jnz L(PrologFindZeroExit3)
+ and $1 << 4 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero_8):
+ test $0x10, %cl
+ jnz L(PrologFindZeroExit5)
+ test $0x20, %cl
+ jnz L(PrologFindZeroExit6)
+ test $0x40, %cl
+ jnz L(PrologFindZeroExit7)
+ and $1 << 8 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero_high):
+ mov %ch, %dh
+ and $15, %dh
+ jz L(prolog_find_zero_high_8)
+ test $0x01, %ch
+ jnz L(PrologFindZeroExit9)
+ test $0x02, %ch
+ jnz L(PrologFindZeroExit10)
+ test $0x04, %ch
+ jnz L(PrologFindZeroExit11)
+ and $1 << 12 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero_high_8):
+ test $0x10, %ch
+ jnz L(PrologFindZeroExit13)
+ test $0x20, %ch
+ jnz L(PrologFindZeroExit14)
+ test $0x40, %ch
+ jnz L(PrologFindZeroExit15)
+ and $1 << 16 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit1):
+ and $1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit2):
+ and $1 << 2 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit3):
+ and $1 << 3 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit5):
+ and $1 << 5 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit6):
+ and $1 << 6 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit7):
+ and $1 << 7 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit9):
+ and $1 << 9 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit10):
+ and $1 << 10 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit11):
+ and $1 << 11 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit13):
+ and $1 << 13 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit14):
+ and $1 << 14 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(PrologFindZeroExit15):
+ and $1 << 15 - 1, %eax
+ jnz L(match_exit)
+ xor %eax, %eax
+ RETURN
+
+END (__strrchr_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr.S
new file mode 100644
index 0000000000..d9281eaeae
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strrchr.S
@@ -0,0 +1,57 @@
+/* Multiple versions of strrchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(strrchr)
+ .type strrchr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strrchr_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf)
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strrchr_sse2)
+2: ret
+END(strrchr)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strrchr_ia32, @function; \
+ .globl __strrchr_ia32; \
+ .p2align 4; \
+ __strrchr_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strrchr; __GI_strrchr = __strrchr_ia32
+#endif
+
+#include "../../strrchr.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strspn-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/strspn-c.c
new file mode 100644
index 0000000000..bea09dea71
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strspn-c.c
@@ -0,0 +1,2 @@
+#define __strspn_sse2 __strspn_ia32
+#include <sysdeps/x86_64/multiarch/strspn-c.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/strspn.S b/REORG.TODO/sysdeps/i386/i686/multiarch/strspn.S
new file mode 100644
index 0000000000..1269062381
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/strspn.S
@@ -0,0 +1,56 @@
+/* Multiple versions of strspn
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+ .text
+ENTRY(strspn)
+ .type strspn, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__strspn_ia32)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__strspn_sse42)
+2: ret
+END(strspn)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strspn_ia32, @function; \
+ .globl __strspn_ia32; \
+ .p2align 4; \
+__strspn_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strspn_ia32, .-__strspn_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strspn; __GI_strspn = __strspn_ia32
+#endif
+
+#include "../../strspn.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/test-multiarch.c b/REORG.TODO/sysdeps/i386/i686/multiarch/test-multiarch.c
new file mode 100644
index 0000000000..593cfec273
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/test-multiarch.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/test-multiarch.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/varshift.c b/REORG.TODO/sysdeps/i386/i686/multiarch/varshift.c
new file mode 100644
index 0000000000..7760b966e2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/varshift.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/varshift.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/varshift.h b/REORG.TODO/sysdeps/i386/i686/multiarch/varshift.h
new file mode 100644
index 0000000000..7c72c70d67
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/varshift.h
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/varshift.h>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-c.c
new file mode 100644
index 0000000000..38d41d04de
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-c.c
@@ -0,0 +1,22 @@
+#include <wchar.h>
+
+#if IS_IN (libc)
+# undef libc_hidden_weak
+# define libc_hidden_weak(name)
+
+# undef weak_alias
+# define weak_alias(name,alias)
+
+# ifdef SHARED
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ __hidden_ver1 (__wcschr_ia32, __GI_wcschr, __wcschr_ia32); \
+ strong_alias (__wcschr_ia32, __wcschr_ia32_1); \
+ __hidden_ver1 (__wcschr_ia32_1, __GI___wcschr, __wcschr_ia32_1);
+# endif
+#endif
+
+extern __typeof (wcschr) __wcschr_ia32;
+
+#define WCSCHR __wcschr_ia32
+#include <wcsmbs/wcschr.c>
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-sse2.S
new file mode 100644
index 0000000000..9ff6c3b8d6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr-sse2.S
@@ -0,0 +1,219 @@
+/* wcschr with SSE2, without using bsf instructions
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define STR1 PARMS
+# define STR2 STR1+4
+
+ atom_text_section
+ENTRY (__wcschr_sse2)
+
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+ mov %ecx, %eax
+ punpckldq %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ punpckldq %xmm1, %xmm1
+
+ and $63, %eax
+ cmp $48, %eax
+ ja L(cross_cache)
+
+ movdqu (%ecx), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+ and $-16, %ecx
+ jmp L(loop)
+
+ .p2align 4
+L(cross_cache):
+ PUSH (%edi)
+ mov %ecx, %edi
+ mov %eax, %ecx
+ and $-16, %edi
+ and $15, %ecx
+ movdqa (%edi), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+
+ sarl %cl, %edx
+ sarl %cl, %eax
+ test %eax, %eax
+ jz L(unaligned_no_match)
+
+ add %edi, %ecx
+ POP (%edi)
+
+ test %edx, %edx
+ jz L(match_case1)
+ test %al, %al
+ jz L(match_higth_case2)
+ test $15, %al
+ jnz L(match_case2_4)
+ test $15, %dl
+ jnz L(return_null)
+ lea 4(%ecx), %eax
+ ret
+
+ CFI_PUSH (%edi)
+
+ .p2align 4
+L(unaligned_no_match):
+ mov %edi, %ecx
+ POP (%edi)
+
+ test %edx, %edx
+ jnz L(return_null)
+
+ pxor %xmm2, %xmm2
+
+/* Loop start on aligned string. */
+ .p2align 4
+L(loop):
+ add $16, %ecx
+ movdqa (%ecx), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+ add $16, %ecx
+
+ movdqa (%ecx), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+ add $16, %ecx
+
+ movdqa (%ecx), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jnz L(matches)
+ add $16, %ecx
+
+ movdqa (%ecx), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %edx
+ pmovmskb %xmm0, %eax
+ or %eax, %edx
+ jz L(loop)
+
+ .p2align 4
+L(matches):
+ pmovmskb %xmm2, %edx
+ test %eax, %eax
+ jz L(return_null)
+ test %edx, %edx
+ jz L(match_case1)
+
+ .p2align 4
+L(match_case2):
+ test %al, %al
+ jz L(match_higth_case2)
+ test $15, %al
+ jnz L(match_case2_4)
+ test $15, %dl
+ jnz L(return_null)
+ lea 4(%ecx), %eax
+ ret
+
+ .p2align 4
+L(match_case2_4):
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(match_higth_case2):
+ test %dl, %dl
+ jnz L(return_null)
+ test $15, %ah
+ jnz L(match_case2_12)
+ test $15, %dh
+ jnz L(return_null)
+ lea 12(%ecx), %eax
+ ret
+
+ .p2align 4
+L(match_case2_12):
+ lea 8(%ecx), %eax
+ ret
+
+ .p2align 4
+L(match_case1):
+ test %al, %al
+ jz L(match_higth_case1)
+
+ test $0x01, %al
+ jnz L(exit0)
+ lea 4(%ecx), %eax
+ ret
+
+ .p2align 4
+L(match_higth_case1):
+ test $0x01, %ah
+ jnz L(exit3)
+ lea 12(%ecx), %eax
+ ret
+
+ .p2align 4
+L(exit0):
+ mov %ecx, %eax
+ ret
+
+ .p2align 4
+L(exit3):
+ lea 8(%ecx), %eax
+ ret
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ ret
+
+END (__wcschr_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr.S
new file mode 100644
index 0000000000..d3c65a6436
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcschr.S
@@ -0,0 +1,36 @@
+/* Multiple versions of wcschr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(__wcschr)
+ .type wcschr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcschr_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wcschr_sse2)
+2: ret
+END(__wcschr)
+weak_alias (__wcschr, wcschr)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-c.c
new file mode 100644
index 0000000000..e3337d77e2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-c.c
@@ -0,0 +1,14 @@
+#include <wchar.h>
+
+#define WCSCMP __wcscmp_ia32
+#ifdef SHARED
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ __hidden_ver1 (__wcscmp_ia32, __GI___wcscmp, __wcscmp_ia32);
+#endif
+#undef weak_alias
+#define weak_alias(name, alias)
+
+extern __typeof (wcscmp) __wcscmp_ia32;
+
+#include "wcsmbs/wcscmp.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
new file mode 100644
index 0000000000..a464b58204
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
@@ -0,0 +1,1018 @@
+/* wcscmp with SSE2
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define ENTRANCE PUSH(%esi); PUSH(%edi)
+# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
+# define PARMS 4
+# define STR1 PARMS
+# define STR2 STR1+4
+
+/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
+
+ .text
+ENTRY (__wcscmp_sse2)
+/*
+ * This implementation uses SSE to compare up to 16 bytes at a time.
+*/
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %eax
+
+ mov (%eax), %ecx
+ cmp %ecx, (%edx)
+ jne L(neq)
+ test %ecx, %ecx
+ jz L(eq)
+
+ mov 4(%eax), %ecx
+ cmp %ecx, 4(%edx)
+ jne L(neq)
+ test %ecx, %ecx
+ jz L(eq)
+
+ mov 8(%eax), %ecx
+ cmp %ecx, 8(%edx)
+ jne L(neq)
+ test %ecx, %ecx
+ jz L(eq)
+
+ mov 12(%eax), %ecx
+ cmp %ecx, 12(%edx)
+ jne L(neq)
+ test %ecx, %ecx
+ jz L(eq)
+
+ ENTRANCE
+ add $16, %eax
+ add $16, %edx
+
+ mov %eax, %esi
+ mov %edx, %edi
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
+ mov %al, %ch
+ mov %dl, %cl
+ and $63, %eax /* esi alignment in cache line */
+ and $63, %edx /* edi alignment in cache line */
+ and $15, %cl
+ jz L(continue_00)
+ cmp $16, %edx
+ jb L(continue_0)
+ cmp $32, %edx
+ jb L(continue_16)
+ cmp $48, %edx
+ jb L(continue_32)
+
+L(continue_48):
+ and $15, %ch
+ jz L(continue_48_00)
+ cmp $16, %eax
+ jb L(continue_0_48)
+ cmp $32, %eax
+ jb L(continue_16_48)
+ cmp $48, %eax
+ jb L(continue_32_48)
+
+ .p2align 4
+L(continue_48_48):
+ mov (%esi), %ecx
+ cmp %ecx, (%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ movdqu 16(%edi), %xmm1
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%edi), %xmm1
+ movdqu 32(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ movdqu 48(%edi), %xmm1
+ movdqu 48(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_48)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_48_48)
+
+L(continue_0):
+ and $15, %ch
+ jz L(continue_0_00)
+ cmp $16, %eax
+ jb L(continue_0_0)
+ cmp $32, %eax
+ jb L(continue_0_16)
+ cmp $48, %eax
+ jb L(continue_0_32)
+
+ .p2align 4
+L(continue_0_48):
+ mov (%esi), %ecx
+ cmp %ecx, (%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ movdqu 16(%edi), %xmm1
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%edi), %xmm1
+ movdqu 32(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ mov 48(%esi), %ecx
+ cmp %ecx, 48(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 52(%esi), %ecx
+ cmp %ecx, 52(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 56(%esi), %ecx
+ cmp %ecx, 56(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 60(%esi), %ecx
+ cmp %ecx, 60(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_0_48)
+
+ .p2align 4
+L(continue_00):
+ and $15, %ch
+ jz L(continue_00_00)
+ cmp $16, %eax
+ jb L(continue_00_0)
+ cmp $32, %eax
+ jb L(continue_00_16)
+ cmp $48, %eax
+ jb L(continue_00_32)
+
+ .p2align 4
+L(continue_00_48):
+ pcmpeqd (%edi), %xmm0
+ mov (%edi), %eax
+ pmovmskb %xmm0, %ecx
+ test %ecx, %ecx
+ jnz L(less4_double_words1)
+
+ cmp (%esi), %eax
+ jne L(nequal)
+
+ mov 4(%edi), %eax
+ cmp 4(%esi), %eax
+ jne L(nequal)
+
+ mov 8(%edi), %eax
+ cmp 8(%esi), %eax
+ jne L(nequal)
+
+ mov 12(%edi), %eax
+ cmp 12(%esi), %eax
+ jne L(nequal)
+
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ movdqu 48(%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_48)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_00_48)
+
+ .p2align 4
+L(continue_32):
+ and $15, %ch
+ jz L(continue_32_00)
+ cmp $16, %eax
+ jb L(continue_0_32)
+ cmp $32, %eax
+ jb L(continue_16_32)
+ cmp $48, %eax
+ jb L(continue_32_32)
+
+ .p2align 4
+L(continue_32_48):
+ mov (%esi), %ecx
+ cmp %ecx, (%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 16(%esi), %ecx
+ cmp %ecx, 16(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 20(%esi), %ecx
+ cmp %ecx, 20(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 24(%esi), %ecx
+ cmp %ecx, 24(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 28(%esi), %ecx
+ cmp %ecx, 28(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ movdqu 32(%edi), %xmm1
+ movdqu 32(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ movdqu 48(%edi), %xmm1
+ movdqu 48(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results */
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_48)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_32_48)
+
+ .p2align 4
+L(continue_16):
+ and $15, %ch
+ jz L(continue_16_00)
+ cmp $16, %eax
+ jb L(continue_0_16)
+ cmp $32, %eax
+ jb L(continue_16_16)
+ cmp $48, %eax
+ jb L(continue_16_32)
+
+ .p2align 4
+L(continue_16_48):
+ mov (%esi), %ecx
+ cmp %ecx, (%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ movdqu 16(%edi), %xmm1
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ mov 32(%esi), %ecx
+ cmp %ecx, 32(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 36(%esi), %ecx
+ cmp %ecx, 36(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 40(%esi), %ecx
+ cmp %ecx, 40(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 44(%esi), %ecx
+ cmp %ecx, 44(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ movdqu 48(%edi), %xmm1
+ movdqu 48(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_48)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_16_48)
+
+ .p2align 4
+L(continue_00_00):
+ movdqa (%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqa 16(%edi), %xmm3
+ pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm3 /* packed sub of comparison results*/
+ pmovmskb %xmm3, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqa 32(%edi), %xmm5
+ pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
+ pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm5 /* packed sub of comparison results*/
+ pmovmskb %xmm5, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ movdqa 48(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_48)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_00_00)
+
+ .p2align 4
+L(continue_00_32):
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ add $16, %esi
+ add $16, %edi
+ jmp L(continue_00_48)
+
+ .p2align 4
+L(continue_00_16):
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ add $32, %esi
+ add $32, %edi
+ jmp L(continue_00_48)
+
+ .p2align 4
+L(continue_00_0):
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%esi), %xmm2
+ pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
+ pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ add $48, %esi
+ add $48, %edi
+ jmp L(continue_00_48)
+
+ .p2align 4
+L(continue_48_00):
+ pcmpeqd (%esi), %xmm0
+ mov (%edi), %eax
+ pmovmskb %xmm0, %ecx
+ test %ecx, %ecx
+ jnz L(less4_double_words1)
+
+ cmp (%esi), %eax
+ jne L(nequal)
+
+ mov 4(%edi), %eax
+ cmp 4(%esi), %eax
+ jne L(nequal)
+
+ mov 8(%edi), %eax
+ cmp 8(%esi), %eax
+ jne L(nequal)
+
+ mov 12(%edi), %eax
+ cmp 12(%esi), %eax
+ jne L(nequal)
+
+ movdqu 16(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ movdqu 48(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_48)
+
+ add $64, %esi
+ add $64, %edi
+ jmp L(continue_48_00)
+
+ .p2align 4
+L(continue_32_00):
+ movdqu (%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ add $16, %esi
+ add $16, %edi
+ jmp L(continue_48_00)
+
+ .p2align 4
+L(continue_16_00):
+ movdqu (%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ add $32, %esi
+ add $32, %edi
+ jmp L(continue_48_00)
+
+ .p2align 4
+L(continue_0_00):
+ movdqu (%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%edi), %xmm1
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ add $48, %esi
+ add $48, %edi
+ jmp L(continue_48_00)
+
+ .p2align 4
+L(continue_32_32):
+ movdqu (%edi), %xmm1
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ add $16, %esi
+ add $16, %edi
+ jmp L(continue_48_48)
+
+ .p2align 4
+L(continue_16_16):
+ movdqu (%edi), %xmm1
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%edi), %xmm3
+ movdqu 16(%esi), %xmm4
+ pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm3 /* packed sub of comparison results*/
+ pmovmskb %xmm3, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ add $32, %esi
+ add $32, %edi
+ jmp L(continue_48_48)
+
+ .p2align 4
+L(continue_0_0):
+ movdqu (%edi), %xmm1
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%edi), %xmm3
+ movdqu 16(%esi), %xmm4
+ pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm3 /* packed sub of comparison results*/
+ pmovmskb %xmm3, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ movdqu 32(%edi), %xmm1
+ movdqu 32(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_32)
+
+ add $48, %esi
+ add $48, %edi
+ jmp L(continue_48_48)
+
+ .p2align 4
+L(continue_0_16):
+ movdqu (%edi), %xmm1
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ movdqu 16(%edi), %xmm1
+ movdqu 16(%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words_16)
+
+ add $32, %esi
+ add $32, %edi
+ jmp L(continue_32_48)
+
+ .p2align 4
+L(continue_0_32):
+ movdqu (%edi), %xmm1
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ add $16, %esi
+ add $16, %edi
+ jmp L(continue_16_48)
+
+ .p2align 4
+L(continue_16_32):
+ movdqu (%edi), %xmm1
+ movdqu (%esi), %xmm2
+ pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
+ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
+ jnz L(less4_double_words)
+
+ add $16, %esi
+ add $16, %edi
+ jmp L(continue_32_48)
+
+ .p2align 4
+L(less4_double_words1):
+ cmp (%esi), %eax
+ jne L(nequal)
+ test %eax, %eax
+ jz L(equal)
+
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
+ test %ecx, %ecx
+ jz L(equal)
+
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(less4_double_words):
+ xor %eax, %eax
+ test %dl, %dl
+ jz L(next_two_double_words)
+ and $15, %dl
+ jz L(second_double_word)
+ mov (%esi), %ecx
+ cmp %ecx, (%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(second_double_word):
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(next_two_double_words):
+ and $15, %dh
+ jz L(fourth_double_word)
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(fourth_double_word):
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(less4_double_words_16):
+ xor %eax, %eax
+ test %dl, %dl
+ jz L(next_two_double_words_16)
+ and $15, %dl
+ jz L(second_double_word_16)
+ mov 16(%esi), %ecx
+ cmp %ecx, 16(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(second_double_word_16):
+ mov 20(%esi), %ecx
+ cmp %ecx, 20(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(next_two_double_words_16):
+ and $15, %dh
+ jz L(fourth_double_word_16)
+ mov 24(%esi), %ecx
+ cmp %ecx, 24(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(fourth_double_word_16):
+ mov 28(%esi), %ecx
+ cmp %ecx, 28(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(less4_double_words_32):
+ xor %eax, %eax
+ test %dl, %dl
+ jz L(next_two_double_words_32)
+ and $15, %dl
+ jz L(second_double_word_32)
+ mov 32(%esi), %ecx
+ cmp %ecx, 32(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(second_double_word_32):
+ mov 36(%esi), %ecx
+ cmp %ecx, 36(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(next_two_double_words_32):
+ and $15, %dh
+ jz L(fourth_double_word_32)
+ mov 40(%esi), %ecx
+ cmp %ecx, 40(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(fourth_double_word_32):
+ mov 44(%esi), %ecx
+ cmp %ecx, 44(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(less4_double_words_48):
+ xor %eax, %eax
+ test %dl, %dl
+ jz L(next_two_double_words_48)
+ and $15, %dl
+ jz L(second_double_word_48)
+ mov 48(%esi), %ecx
+ cmp %ecx, 48(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(second_double_word_48):
+ mov 52(%esi), %ecx
+ cmp %ecx, 52(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(next_two_double_words_48):
+ and $15, %dh
+ jz L(fourth_double_word_48)
+ mov 56(%esi), %ecx
+ cmp %ecx, 56(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(fourth_double_word_48):
+ mov 60(%esi), %ecx
+ cmp %ecx, 60(%edi)
+ jne L(nequal)
+ RETURN
+
+ .p2align 4
+L(nequal):
+ mov $1, %eax
+ jg L(return)
+ neg %eax
+ RETURN
+
+ .p2align 4
+L(return):
+ RETURN
+
+ .p2align 4
+L(equal):
+ xorl %eax, %eax
+ RETURN
+
+ CFI_POP (%edi)
+ CFI_POP (%esi)
+
+ .p2align 4
+L(neq):
+ mov $1, %eax
+ jg L(neq_bigger)
+ neg %eax
+
+L(neq_bigger):
+ ret
+
+ .p2align 4
+L(eq):
+ xorl %eax, %eax
+ ret
+
+END (__wcscmp_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp.S
new file mode 100644
index 0000000000..7118bdd4db
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscmp.S
@@ -0,0 +1,39 @@
+/* Multiple versions of wcscmp
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc and for the
+ DSO. In static binaries, we need wcscmp before the initialization
+ happened. */
+#if IS_IN (libc)
+ .text
+ENTRY(__wcscmp)
+ .type __wcscmp, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcscmp_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wcscmp_sse2)
+2: ret
+END(__wcscmp)
+weak_alias (__wcscmp, wcscmp)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-c.c
new file mode 100644
index 0000000000..fb3000392b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-c.c
@@ -0,0 +1,5 @@
+#if IS_IN (libc)
+# define wcscpy __wcscpy_ia32
+#endif
+
+#include "wcsmbs/wcscpy.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
new file mode 100644
index 0000000000..6280ba92ab
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
@@ -0,0 +1,600 @@
+/* wcscpy with SSSE3
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 4
+# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
+# define STR1 PARMS
+# define STR2 STR1+4
+# define LEN STR2+4
+
+ atom_text_section
+ENTRY (__wcscpy_ssse3)
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %ecx
+
+ cmp $0, (%ecx)
+ jz L(ExitTail4)
+ cmp $0, 4(%ecx)
+ jz L(ExitTail8)
+ cmp $0, 8(%ecx)
+ jz L(ExitTail12)
+ cmp $0, 12(%ecx)
+ jz L(ExitTail16)
+
+ PUSH (%edi)
+ mov %edx, %edi
+ PUSH (%esi)
+ lea 16(%ecx), %esi
+
+ and $-16, %esi
+
+ pxor %xmm0, %xmm0
+ pcmpeqd (%esi), %xmm0
+ movdqu (%ecx), %xmm1
+ movdqu %xmm1, (%edx)
+
+ pmovmskb %xmm0, %eax
+ sub %ecx, %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ mov %edx, %eax
+ lea 16(%edx), %edx
+ and $-16, %edx
+ sub %edx, %eax
+
+ sub %eax, %ecx
+ mov %ecx, %eax
+ and $0xf, %eax
+ mov $0, %esi
+
+ jz L(Align16Both)
+ cmp $4, %eax
+ je L(Shl4)
+ cmp $8, %eax
+ je L(Shl8)
+ jmp L(Shl12)
+
+L(Align16Both):
+ movaps (%ecx), %xmm1
+ movaps 16(%ecx), %xmm2
+ movaps %xmm1, (%edx)
+ pcmpeqd %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm3
+ movaps %xmm2, (%edx, %esi)
+ pcmpeqd %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm4
+ movaps %xmm3, (%edx, %esi)
+ pcmpeqd %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm1
+ movaps %xmm4, (%edx, %esi)
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm2
+ movaps %xmm1, (%edx, %esi)
+ pcmpeqd %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps 16(%ecx, %esi), %xmm3
+ movaps %xmm2, (%edx, %esi)
+ pcmpeqd %xmm3, %xmm0
+ pmovmskb %xmm0, %eax
+ lea 16(%esi), %esi
+
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps %xmm3, (%edx, %esi)
+ mov %ecx, %eax
+ lea 16(%ecx, %esi), %ecx
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ sub %eax, %edx
+
+ mov $-0x40, %esi
+
+L(Aligned64Loop):
+ movaps (%ecx), %xmm2
+ movaps 32(%ecx), %xmm3
+ movaps %xmm2, %xmm4
+ movaps 16(%ecx), %xmm5
+ movaps %xmm3, %xmm6
+ movaps 48(%ecx), %xmm7
+ pminub %xmm5, %xmm2
+ pminub %xmm7, %xmm3
+ pminub %xmm2, %xmm3
+ lea 64(%edx), %edx
+ pcmpeqd %xmm0, %xmm3
+ lea 64(%ecx), %ecx
+ pmovmskb %xmm3, %eax
+
+ test %eax, %eax
+ jnz L(Aligned64Leave)
+ movaps %xmm4, -64(%edx)
+ movaps %xmm5, -48(%edx)
+ movaps %xmm6, -32(%edx)
+ movaps %xmm7, -16(%edx)
+ jmp L(Aligned64Loop)
+
+L(Aligned64Leave):
+ pcmpeqd %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqd %xmm5, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm4, -64(%edx)
+ test %eax, %eax
+ lea 16(%esi), %esi
+ jnz L(CopyFrom1To16Bytes)
+
+ pcmpeqd %xmm6, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm5, -48(%edx)
+ test %eax, %eax
+ lea 16(%esi), %esi
+ jnz L(CopyFrom1To16Bytes)
+
+ movaps %xmm6, -32(%edx)
+ pcmpeqd %xmm7, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ lea 16(%esi), %esi
+ jnz L(CopyFrom1To16Bytes)
+
+ mov $-0x40, %esi
+ movaps %xmm7, -16(%edx)
+ jmp L(Aligned64Loop)
+
+ .p2align 4
+L(Shl4):
+ movaps -4(%ecx), %xmm1
+ movaps 12(%ecx), %xmm2
+L(Shl4Start):
+ pcmpeqd %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm1
+
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 28(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+
+ test %eax, %eax
+ jnz L(Shl4LoopExit)
+
+ palignr $4, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 28(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -12(%ecx), %ecx
+ sub %eax, %edx
+
+ movaps -4(%ecx), %xmm1
+
+L(Shl4LoopStart):
+ movaps 12(%ecx), %xmm2
+ movaps 28(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 44(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 60(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqd %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $4, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $4, %xmm3, %xmm4
+ jnz L(Shl4Start)
+
+ palignr $4, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $4, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl4LoopStart)
+
+L(Shl4LoopExit):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
+ POP (%esi)
+ add $12, %edx
+ add $12, %ecx
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit4)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(Shl8):
+ movaps -8(%ecx), %xmm1
+ movaps 8(%ecx), %xmm2
+L(Shl8Start):
+ pcmpeqd %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm1
+
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 24(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+
+ test %eax, %eax
+ jnz L(Shl8LoopExit)
+
+ palignr $8, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 24(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -8(%ecx), %ecx
+ sub %eax, %edx
+
+ movaps -8(%ecx), %xmm1
+
+L(Shl8LoopStart):
+ movaps 8(%ecx), %xmm2
+ movaps 24(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 40(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 56(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqd %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $8, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $8, %xmm3, %xmm4
+ jnz L(Shl8Start)
+
+ palignr $8, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $8, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl8LoopStart)
+
+L(Shl8LoopExit):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ POP (%esi)
+ add $8, %edx
+ add $8, %ecx
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit4)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(Shl12):
+ movaps -12(%ecx), %xmm1
+ movaps 4(%ecx), %xmm2
+L(Shl12Start):
+ pcmpeqd %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ movaps %xmm2, %xmm3
+
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm1
+
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+ movaps %xmm2, %xmm3
+
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm2, (%edx)
+ movaps 20(%ecx), %xmm2
+
+ pcmpeqd %xmm2, %xmm0
+ lea 16(%edx), %edx
+ pmovmskb %xmm0, %eax
+ lea 16(%ecx), %ecx
+
+ test %eax, %eax
+ jnz L(Shl12LoopExit)
+
+ palignr $12, %xmm3, %xmm2
+ movaps %xmm2, (%edx)
+ lea 20(%ecx), %ecx
+ lea 16(%edx), %edx
+
+ mov %ecx, %eax
+ and $-0x40, %ecx
+ sub %ecx, %eax
+ lea -4(%ecx), %ecx
+ sub %eax, %edx
+
+ movaps -12(%ecx), %xmm1
+
+L(Shl12LoopStart):
+ movaps 4(%ecx), %xmm2
+ movaps 20(%ecx), %xmm3
+ movaps %xmm3, %xmm6
+ movaps 36(%ecx), %xmm4
+ movaps %xmm4, %xmm7
+ movaps 52(%ecx), %xmm5
+ pminub %xmm2, %xmm6
+ pminub %xmm5, %xmm7
+ pminub %xmm6, %xmm7
+ pcmpeqd %xmm0, %xmm7
+ pmovmskb %xmm7, %eax
+ movaps %xmm5, %xmm7
+ palignr $12, %xmm4, %xmm5
+ test %eax, %eax
+ palignr $12, %xmm3, %xmm4
+ jnz L(Shl12Start)
+
+ palignr $12, %xmm2, %xmm3
+ lea 64(%ecx), %ecx
+ palignr $12, %xmm1, %xmm2
+ movaps %xmm7, %xmm1
+ movaps %xmm5, 48(%edx)
+ movaps %xmm4, 32(%edx)
+ movaps %xmm3, 16(%edx)
+ movaps %xmm2, (%edx)
+ lea 64(%edx), %edx
+ jmp L(Shl12LoopStart)
+
+L(Shl12LoopExit):
+ movl (%ecx), %esi
+ movl %esi, (%edx)
+ mov $4, %esi
+
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %esi, %edx
+ add %esi, %ecx
+
+ POP (%esi)
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit4)
+L(Exit8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN
+
+ .p2align 4
+L(ExitHigh):
+ test $0x01, %ah
+ jnz L(Exit12)
+L(Exit16):
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ movl %edi, %eax
+ RETURN
+
+ .p2align 4
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl %edi, %eax
+ RETURN
+
+ .p2align 4
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl 8(%ecx), %eax
+ movl %eax, 8(%edx)
+ movl %edi, %eax
+ RETURN
+
+CFI_POP (%edi)
+
+ .p2align 4
+L(ExitTail4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ movl %edx, %eax
+ ret
+
+ .p2align 4
+L(ExitTail8):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl %edx, %eax
+ ret
+
+ .p2align 4
+L(ExitTail12):
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movl 8(%ecx), %eax
+ movl %eax, 8(%edx)
+ movl %edx, %eax
+ ret
+
+ .p2align 4
+L(ExitTail16):
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ movl %edx, %eax
+ ret
+
+END (__wcscpy_ssse3)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy.S
new file mode 100644
index 0000000000..cfc97dd87c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcscpy.S
@@ -0,0 +1,36 @@
+/* Multiple versions of wcscpy
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+ .text
+ENTRY(wcscpy)
+ .type wcscpy, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcscpy_ia32)
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wcscpy_ssse3)
+2: ret
+END(wcscpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-c.c
new file mode 100644
index 0000000000..a335dc0f7e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-c.c
@@ -0,0 +1,9 @@
+#include <wchar.h>
+
+#if IS_IN (libc)
+# define WCSLEN __wcslen_ia32
+#endif
+
+extern __typeof (wcslen) __wcslen_ia32;
+
+#include "wcsmbs/wcslen.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-sse2.S
new file mode 100644
index 0000000000..bd3fc4c79b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen-sse2.S
@@ -0,0 +1,193 @@
+/* wcslen with SSE2
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+# define STR 4
+
+ .text
+ENTRY (__wcslen_sse2)
+ mov STR(%esp), %edx
+
+ cmp $0, (%edx)
+ jz L(exit_tail0)
+ cmp $0, 4(%edx)
+ jz L(exit_tail1)
+ cmp $0, 8(%edx)
+ jz L(exit_tail2)
+ cmp $0, 12(%edx)
+ jz L(exit_tail3)
+ cmp $0, 16(%edx)
+ jz L(exit_tail4)
+ cmp $0, 20(%edx)
+ jz L(exit_tail5)
+ cmp $0, 24(%edx)
+ jz L(exit_tail6)
+ cmp $0, 28(%edx)
+ jz L(exit_tail7)
+
+ pxor %xmm0, %xmm0
+
+ lea 32(%edx), %eax
+ lea 16(%edx), %ecx
+ and $-16, %eax
+
+ pcmpeqd (%eax), %xmm0
+ pmovmskb %xmm0, %edx
+ pxor %xmm1, %xmm1
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqd (%eax), %xmm1
+ pmovmskb %xmm1, %edx
+ pxor %xmm2, %xmm2
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqd (%eax), %xmm2
+ pmovmskb %xmm2, %edx
+ pxor %xmm3, %xmm3
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ pcmpeqd (%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 16(%eax), %eax
+ jnz L(exit)
+
+ and $-0x40, %eax
+
+ .p2align 4
+L(aligned_64_loop):
+ movaps (%eax), %xmm0
+ movaps 16(%eax), %xmm1
+ movaps 32(%eax), %xmm2
+ movaps 48(%eax), %xmm6
+
+ pminub %xmm1, %xmm0
+ pminub %xmm6, %xmm2
+ pminub %xmm0, %xmm2
+ pcmpeqd %xmm3, %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ lea 64(%eax), %eax
+ jz L(aligned_64_loop)
+
+ pcmpeqd -64(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea 48(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqd %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqd -32(%eax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ pcmpeqd %xmm6, %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ lea -16(%ecx), %ecx
+ jnz L(exit)
+
+ jmp L(aligned_64_loop)
+
+ .p2align 4
+L(exit):
+ sub %ecx, %eax
+ shr $2, %eax
+ test %dl, %dl
+ jz L(exit_high)
+
+ mov %dl, %cl
+ and $15, %cl
+ jz L(exit_1)
+ ret
+
+ .p2align 4
+L(exit_high):
+ mov %dh, %ch
+ and $15, %ch
+ jz L(exit_3)
+ add $2, %eax
+ ret
+
+ .p2align 4
+L(exit_1):
+ add $1, %eax
+ ret
+
+ .p2align 4
+L(exit_3):
+ add $3, %eax
+ ret
+
+ .p2align 4
+L(exit_tail0):
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(exit_tail1):
+ mov $1, %eax
+ ret
+
+ .p2align 4
+L(exit_tail2):
+ mov $2, %eax
+ ret
+
+ .p2align 4
+L(exit_tail3):
+ mov $3, %eax
+ ret
+
+ .p2align 4
+L(exit_tail4):
+ mov $4, %eax
+ ret
+
+ .p2align 4
+L(exit_tail5):
+ mov $5, %eax
+ ret
+
+ .p2align 4
+L(exit_tail6):
+ mov $6, %eax
+ ret
+
+ .p2align 4
+L(exit_tail7):
+ mov $7, %eax
+ ret
+
+END (__wcslen_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen.S
new file mode 100644
index 0000000000..6ef9b6e7b5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcslen.S
@@ -0,0 +1,37 @@
+/* Multiple versions of wcslen
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(__wcslen)
+ .type __wcslen, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcslen_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wcslen_sse2)
+2: ret
+END(__wcslen)
+
+weak_alias(__wcslen, wcslen)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-c.c
new file mode 100644
index 0000000000..8d8a335b5b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-c.c
@@ -0,0 +1,5 @@
+#if IS_IN (libc)
+# define wcsrchr __wcsrchr_ia32
+#endif
+
+#include "wcsmbs/wcsrchr.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
new file mode 100644
index 0000000000..1a9b60e55e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
@@ -0,0 +1,354 @@
+/* wcsrchr with SSE2, without using bsf instructions.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+# define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS 8
+# define ENTRANCE PUSH (%edi);
+# define RETURN POP (%edi); ret; CFI_PUSH (%edi);
+# define STR1 PARMS
+# define STR2 STR1+4
+
+ atom_text_section
+ENTRY (__wcsrchr_sse2)
+
+ ENTRANCE
+ mov STR1(%esp), %ecx
+ movd STR2(%esp), %xmm1
+
+ mov %ecx, %edi
+ punpckldq %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ punpckldq %xmm1, %xmm1
+
+/* ECX has OFFSET. */
+ and $63, %ecx
+ cmp $48, %ecx
+ ja L(crosscache)
+
+/* unaligned string. */
+ movdqu (%edi), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ pcmpeqd %xmm1, %xmm0
+/* Find where NULL is. */
+ pmovmskb %xmm2, %ecx
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ add $16, %edi
+
+ test %eax, %eax
+ jnz L(unaligned_match1)
+
+ test %ecx, %ecx
+ jnz L(return_null)
+
+ and $-16, %edi
+
+ PUSH (%esi)
+
+ xor %edx, %edx
+ jmp L(loop)
+
+ CFI_POP (%esi)
+
+ .p2align 4
+L(unaligned_match1):
+ test %ecx, %ecx
+ jnz L(prolog_find_zero_1)
+
+ PUSH (%esi)
+
+/* Save current match */
+ mov %eax, %edx
+ mov %edi, %esi
+ and $-16, %edi
+ jmp L(loop)
+
+ CFI_POP (%esi)
+
+ .p2align 4
+L(crosscache):
+/* Hancle unaligned string. */
+ and $15, %ecx
+ and $-16, %edi
+ pxor %xmm3, %xmm3
+ movdqa (%edi), %xmm0
+ pcmpeqd %xmm0, %xmm3
+ pcmpeqd %xmm1, %xmm0
+/* Find where NULL is. */
+ pmovmskb %xmm3, %edx
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+/* Remove the leading bytes. */
+ shr %cl, %edx
+ shr %cl, %eax
+ add $16, %edi
+
+ test %eax, %eax
+ jnz L(unaligned_match)
+
+ test %edx, %edx
+ jnz L(return_null)
+
+ PUSH (%esi)
+
+ xor %edx, %edx
+ jmp L(loop)
+
+ CFI_POP (%esi)
+
+ .p2align 4
+L(unaligned_match):
+ test %edx, %edx
+ jnz L(prolog_find_zero)
+
+ PUSH (%esi)
+
+ mov %eax, %edx
+ lea (%edi, %ecx), %esi
+
+/* Loop start on aligned string. */
+ .p2align 4
+L(loop):
+ movdqa (%edi), %xmm0
+ pcmpeqd %xmm0, %xmm2
+ add $16, %edi
+ pcmpeqd %xmm1, %xmm0
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm0, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm3
+ pcmpeqd %xmm3, %xmm2
+ add $16, %edi
+ pcmpeqd %xmm1, %xmm3
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm3, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm4
+ pcmpeqd %xmm4, %xmm2
+ add $16, %edi
+ pcmpeqd %xmm1, %xmm4
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm4, %eax
+ or %eax, %ecx
+ jnz L(matches)
+
+ movdqa (%edi), %xmm5
+ pcmpeqd %xmm5, %xmm2
+ add $16, %edi
+ pcmpeqd %xmm1, %xmm5
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm5, %eax
+ or %eax, %ecx
+ jz L(loop)
+
+ .p2align 4
+L(matches):
+ test %eax, %eax
+ jnz L(match)
+L(return_value):
+ test %edx, %edx
+ jz L(return_null_1)
+ mov %edx, %eax
+ mov %esi, %edi
+
+ POP (%esi)
+
+ test %ah, %ah
+ jnz L(match_third_or_fourth_wchar)
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(return_null_1):
+ POP (%esi)
+
+ xor %eax, %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(match):
+ pmovmskb %xmm2, %ecx
+ test %ecx, %ecx
+ jnz L(find_zero)
+/* save match info */
+ mov %eax, %edx
+ mov %edi, %esi
+ jmp L(loop)
+
+ .p2align 4
+L(find_zero):
+ test %cl, %cl
+ jz L(find_zero_in_third_or_fourth_wchar)
+ test $15, %cl
+ jz L(find_zero_in_second_wchar)
+ and $1, %eax
+ jz L(return_value)
+
+ POP (%esi)
+
+ lea -16(%edi), %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(find_zero_in_second_wchar):
+ and $1 << 5 - 1, %eax
+ jz L(return_value)
+
+ POP (%esi)
+
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(find_zero_in_third_or_fourth_wchar):
+ test $15, %ch
+ jz L(find_zero_in_fourth_wchar)
+ and $1 << 9 - 1, %eax
+ jz L(return_value)
+
+ POP (%esi)
+
+ test %ah, %ah
+ jnz L(match_third_wchar)
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(find_zero_in_fourth_wchar):
+
+ POP (%esi)
+
+ test %ah, %ah
+ jnz L(match_third_or_fourth_wchar)
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+ CFI_PUSH (%esi)
+
+ .p2align 4
+L(match_second_wchar):
+ lea -12(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_third_or_fourth_wchar):
+ test $15 << 4, %ah
+ jnz L(match_fourth_wchar)
+ lea -8(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_third_wchar):
+ lea -8(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(match_fourth_wchar):
+ lea -4(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero):
+ add %ecx, %edi
+ mov %edx, %ecx
+L(prolog_find_zero_1):
+ test %cl, %cl
+ jz L(prolog_find_zero_in_third_or_fourth_wchar)
+ test $15, %cl
+ jz L(prolog_find_zero_in_second_wchar)
+ and $1, %eax
+ jz L(return_null)
+
+ lea -16(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero_in_second_wchar):
+ and $1 << 5 - 1, %eax
+ jz L(return_null)
+
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero_in_third_or_fourth_wchar):
+ test $15, %ch
+ jz L(prolog_find_zero_in_fourth_wchar)
+ and $1 << 9 - 1, %eax
+ jz L(return_null)
+
+ test %ah, %ah
+ jnz L(match_third_wchar)
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+ .p2align 4
+L(prolog_find_zero_in_fourth_wchar):
+ test %ah, %ah
+ jnz L(match_third_or_fourth_wchar)
+ test $15 << 4, %al
+ jnz L(match_second_wchar)
+ lea -16(%edi), %eax
+ RETURN
+
+END (__wcsrchr_sse2)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr.S
new file mode 100644
index 0000000000..cf67333995
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wcsrchr.S
@@ -0,0 +1,35 @@
+/* Multiple versions of wcsrchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#if IS_IN (libc)
+ .text
+ENTRY(wcsrchr)
+ .type wcsrchr, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wcsrchr_ia32)
+ HAS_CPU_FEATURE (SSE2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wcsrchr_sse2)
+2: ret
+END(wcsrchr)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-c.c b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-c.c
new file mode 100644
index 0000000000..75ab4b94c1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-c.c
@@ -0,0 +1,9 @@
+#include <wchar.h>
+
+#if IS_IN (libc)
+# define WMEMCMP __wmemcmp_ia32
+#endif
+
+extern __typeof (wmemcmp) __wmemcmp_ia32;
+
+#include "wcsmbs/wmemcmp.c"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
new file mode 100644
index 0000000000..1a857c7e21
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
@@ -0,0 +1,4 @@
+#define USE_AS_WMEMCMP 1
+#define MEMCMP __wmemcmp_sse4_2
+
+#include "memcmp-sse4.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
new file mode 100644
index 0000000000..a41ef95fc1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_AS_WMEMCMP 1
+#define MEMCMP __wmemcmp_ssse3
+
+#include "memcmp-ssse3.S"
diff --git a/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp.S b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp.S
new file mode 100644
index 0000000000..1b9a54a413
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/multiarch/wmemcmp.S
@@ -0,0 +1,40 @@
+/* Multiple versions of wmemcmp
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc. */
+
+#if IS_IN (libc)
+ .text
+ENTRY(wmemcmp)
+ .type wmemcmp, @gnu_indirect_function
+ LOAD_GOT_AND_RTLD_GLOBAL_RO
+ LOAD_FUNC_GOT_EAX (__wmemcmp_ia32)
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3)
+ HAS_CPU_FEATURE (SSE4_2)
+ jz 2f
+ LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2)
+2: ret
+END(wmemcmp)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i686/nptl/tls.h b/REORG.TODO/sysdeps/i386/i686/nptl/tls.h
new file mode 100644
index 0000000000..5b527af9d3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/nptl/tls.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _TLS_H
+
+/* Additional definitions for <tls.h> on i686 and up. */
+
+
+/* Macros to load from and store into segment registers. We can use
+ the 32-bit instructions. */
+#define TLS_GET_GS() \
+ ({ int __seg; __asm ("movl %%gs, %0" : "=q" (__seg)); __seg; })
+#define TLS_SET_GS(val) \
+ __asm ("movl %0, %%gs" :: "q" (val))
+
+
+/* Get the full set of definitions. */
+#include_next <tls.h>
+
+#endif /* tls.h */
diff --git a/REORG.TODO/sysdeps/i386/i686/pthread_spin_trylock.S b/REORG.TODO/sysdeps/i386/i686/pthread_spin_trylock.S
new file mode 100644
index 0000000000..ce9c94d41a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/pthread_spin_trylock.S
@@ -0,0 +1,20 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define HAVE_CMOV 1
+#include <sysdeps/i386/pthread_spin_trylock.S>
diff --git a/REORG.TODO/sysdeps/i386/i686/stack-aliasing.h b/REORG.TODO/sysdeps/i386/i686/stack-aliasing.h
new file mode 100644
index 0000000000..9b5a1b0d47
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/stack-aliasing.h
@@ -0,0 +1,23 @@
+/* Define macros for stack address aliasing issues for NPTL. i686 version.
+ Copyright (C) 2014-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* What is useful is to avoid the 64k aliasing problem which reliably
+ happens if all stacks use sizes which are a multiple of 64k. Tell
+ the stack allocator to disturb this by allocation one more page if
+ necessary. */
+#define MULTI_PAGE_ALIASING 65536
diff --git a/REORG.TODO/sysdeps/i386/i686/strcmp.S b/REORG.TODO/sysdeps/i386/i686/strcmp.S
new file mode 100644
index 0000000000..1ae305912e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/strcmp.S
@@ -0,0 +1,52 @@
+/* Highly optimized version for ix86, x>=6.
+ Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define STR1 PARMS
+#define STR2 STR1+4
+
+ .text
+ENTRY (strcmp)
+
+ movl STR1(%esp), %ecx
+ movl STR2(%esp), %edx
+
+L(oop): movb (%ecx), %al
+ cmpb (%edx), %al
+ jne L(neq)
+ incl %ecx
+ incl %edx
+ testb %al, %al
+ jnz L(oop)
+
+ xorl %eax, %eax
+ /* when strings are equal, pointers rest one beyond
+ the end of the NUL terminators. */
+ ret
+
+L(neq): movl $1, %eax
+ movl $-1, %ecx
+ cmovbl %ecx, %eax
+
+ ret
+END (strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/REORG.TODO/sysdeps/i386/i686/tst-stack-align.h b/REORG.TODO/sysdeps/i386/i686/tst-stack-align.h
new file mode 100644
index 0000000000..51f03fe77b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i686/tst-stack-align.h
@@ -0,0 +1,44 @@
+/* Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdint.h>
+#ifndef __SSE__
+#include_next <tst-stack-align.h>
+#else
+#include <xmmintrin.h>
+
+#define TEST_STACK_ALIGN() \
+ ({ \
+ __m128 _m; \
+ double _d = 12.0; \
+ long double _ld = 15.0; \
+ int _ret = 0; \
+ printf ("__m128: %p %zu\n", &_m, __alignof (__m128)); \
+ if ((((uintptr_t) &_m) & (__alignof (__m128) - 1)) != 0) \
+ _ret = 1; \
+ \
+ printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \
+ if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \
+ _ret = 1; \
+ \
+ printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \
+ if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \
+ _ret = 1; \
+ _ret; \
+ })
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i786/Implies b/REORG.TODO/sysdeps/i386/i786/Implies
new file mode 100644
index 0000000000..1cd29f63cf
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i786/Implies
@@ -0,0 +1,2 @@
+# The PPro and PII cores are mostly the same.
+i386/i686
diff --git a/REORG.TODO/sysdeps/i386/init-arch.h b/REORG.TODO/sysdeps/i386/init-arch.h
new file mode 100644
index 0000000000..72881c5679
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/init-arch.h
@@ -0,0 +1,19 @@
+/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define MINIMUM_ISA 486
+#include <sysdeps/x86/init-arch.h>
diff --git a/REORG.TODO/sysdeps/i386/jmpbuf-offsets.h b/REORG.TODO/sysdeps/i386/jmpbuf-offsets.h
new file mode 100644
index 0000000000..1c95db7287
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/jmpbuf-offsets.h
@@ -0,0 +1,25 @@
+/* Private macros for accessing __jmp_buf contents. i386 version.
+ Copyright (C) 2006-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define JB_BX 0
+#define JB_SI 1
+#define JB_DI 2
+#define JB_BP 3
+#define JB_SP 4
+#define JB_PC 5
+#define JB_SIZE 24
diff --git a/REORG.TODO/sysdeps/i386/jmpbuf-unwind.h b/REORG.TODO/sysdeps/i386/jmpbuf-unwind.h
new file mode 100644
index 0000000000..0a63a832cc
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/jmpbuf-unwind.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <setjmp.h>
+#include <jmpbuf-offsets.h>
+#include <stdint.h>
+#include <unwind.h>
+#include <sysdep.h>
+
+/* Test if longjmp to JMPBUF would unwind the frame
+ containing a local variable at ADDRESS. */
+#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \
+ ((void *) (address) < (void *) demangle ((jmpbuf)[JB_SP]))
+
+#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \
+ _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj)
+
+static inline uintptr_t __attribute__ ((unused))
+_jmpbuf_sp (__jmp_buf regs)
+{
+ uintptr_t sp = regs[JB_SP];
+#ifdef PTR_DEMANGLE
+ PTR_DEMANGLE (sp);
+#endif
+ return sp;
+}
+
+#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \
+ ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj))
+
+/* We use the normal longjmp for unwinding. */
+#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val)
diff --git a/REORG.TODO/sysdeps/i386/ldbl2mpn.c b/REORG.TODO/sysdeps/i386/ldbl2mpn.c
new file mode 100644
index 0000000000..076be0ae7e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/ldbl2mpn.c
@@ -0,0 +1,120 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include <ieee754.h>
+#include <float.h>
+#include <stdlib.h>
+
+/* Convert a `long double' in IEEE854 standard double-precision format to a
+ multi-precision integer representing the significand scaled up by its
+ number of bits (64 for long double) and an integral power of two
+ (MPN frexpl). */
+
+mp_size_t
+__mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size,
+ int *expt, int *is_neg,
+ long double value)
+{
+ union ieee854_long_double u;
+ u.d = value;
+
+ *is_neg = u.ieee.negative;
+ *expt = (int) u.ieee.exponent - IEEE854_LONG_DOUBLE_BIAS;
+
+#if BITS_PER_MP_LIMB == 32
+ res_ptr[0] = u.ieee.mantissa1; /* Low-order 32 bits of fraction. */
+ res_ptr[1] = u.ieee.mantissa0; /* High-order 32 bits. */
+ #define N 2
+#elif BITS_PER_MP_LIMB == 64
+ /* Hopefully the compiler will combine the two bitfield extracts
+ and this composition into just the original quadword extract. */
+ res_ptr[0] = ((mp_limb_t) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
+ #define N 1
+#else
+ #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
+#endif
+
+ if (u.ieee.exponent == 0)
+ {
+ /* A biased exponent of zero is a special case.
+ Either it is a zero or it is a denormal number. */
+ if (res_ptr[0] == 0 && res_ptr[N - 1] == 0) /* Assumes N<=2. */
+ /* It's zero. */
+ *expt = 0;
+ else
+ {
+ /* It is a denormal number, meaning it has no implicit leading
+ one bit, and its exponent is in fact the format minimum. */
+ int cnt;
+
+ /* One problem with Intel's 80-bit format is that the explicit
+ leading one in the normalized representation has to be zero
+ for denormalized number. If it is one, the number is according
+ to Intel's specification an invalid number. We make the
+ representation unique by explicitly clearing this bit. */
+ res_ptr[N - 1] &= ~((mp_limb_t) 1 << ((LDBL_MANT_DIG - 1) % BITS_PER_MP_LIMB));
+
+ if (res_ptr[N - 1] != 0)
+ {
+ count_leading_zeros (cnt, res_ptr[N - 1]);
+ if (cnt != 0)
+ {
+#if N == 2
+ res_ptr[N - 1] = res_ptr[N - 1] << cnt
+ | (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
+ res_ptr[0] <<= cnt;
+#else
+ res_ptr[N - 1] <<= cnt;
+#endif
+ }
+ *expt = LDBL_MIN_EXP - 1 - cnt;
+ }
+ else if (res_ptr[0] != 0)
+ {
+ count_leading_zeros (cnt, res_ptr[0]);
+ res_ptr[N - 1] = res_ptr[0] << cnt;
+ res_ptr[0] = 0;
+ *expt = LDBL_MIN_EXP - 1 - BITS_PER_MP_LIMB - cnt;
+ }
+ else
+ {
+ /* This is the special case of the pseudo denormal number
+ with only the implicit leading bit set. The value is
+ in fact a normal number and so we have to treat this
+ case differently. */
+#if N == 2
+ res_ptr[N - 1] = 0x80000000ul;
+#else
+ res_ptr[0] = 0x8000000000000000ul;
+#endif
+ *expt = LDBL_MIN_EXP - 1;
+ }
+ }
+ }
+ else if (u.ieee.exponent < 0x7fff
+#if N == 2
+ && res_ptr[0] == 0
+#endif
+ && res_ptr[N - 1] == 0)
+ /* Pseudo zero. */
+ *expt = 0;
+
+ return N;
+}
diff --git a/REORG.TODO/sysdeps/i386/ldsodefs.h b/REORG.TODO/sysdeps/i386/ldsodefs.h
new file mode 100644
index 0000000000..a369f5fc68
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/ldsodefs.h
@@ -0,0 +1,41 @@
+/* Run-time dynamic linker data structures for loaded ELF shared objects.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _I386_LDSODEFS_H
+#define _I386_LDSODEFS_H 1
+
+#include <elf.h>
+#include <cpu-features.h>
+
+struct La_i86_regs;
+struct La_i86_retval;
+
+#define ARCH_PLTENTER_MEMBERS \
+ Elf32_Addr (*i86_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \
+ uintptr_t *, struct La_i86_regs *, \
+ unsigned int *, const char *name, \
+ long int *framesizep)
+
+#define ARCH_PLTEXIT_MEMBERS \
+ unsigned int (*i86_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \
+ uintptr_t *, const struct La_i86_regs *, \
+ struct La_i86_retval *, const char *)
+
+#include_next <ldsodefs.h>
+
+#endif
diff --git a/REORG.TODO/sysdeps/i386/link-defines.sym b/REORG.TODO/sysdeps/i386/link-defines.sym
new file mode 100644
index 0000000000..0995adb37f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/link-defines.sym
@@ -0,0 +1,20 @@
+#include "link.h"
+#include <stddef.h>
+
+--
+LONG_DOUBLE_SIZE sizeof (long double)
+
+LR_SIZE sizeof (struct La_i86_regs)
+LR_EDX_OFFSET offsetof (struct La_i86_regs, lr_edx)
+LR_ECX_OFFSET offsetof (struct La_i86_regs, lr_ecx)
+LR_EAX_OFFSET offsetof (struct La_i86_regs, lr_eax)
+LR_EBP_OFFSET offsetof (struct La_i86_regs, lr_ebp)
+LR_ESP_OFFSET offsetof (struct La_i86_regs, lr_esp)
+
+LRV_SIZE sizeof (struct La_i86_retval)
+LRV_EAX_OFFSET offsetof (struct La_i86_retval, lrv_eax)
+LRV_EDX_OFFSET offsetof (struct La_i86_retval, lrv_edx)
+LRV_ST0_OFFSET offsetof (struct La_i86_retval, lrv_st0)
+LRV_ST1_OFFSET offsetof (struct La_i86_retval, lrv_st1)
+LRV_BND0_OFFSET offsetof (struct La_i86_retval, lrv_bnd0)
+LRV_BND1_OFFSET offsetof (struct La_i86_retval, lrv_bnd1)
diff --git a/REORG.TODO/sysdeps/i386/lshift.S b/REORG.TODO/sysdeps/i386/lshift.S
new file mode 100644
index 0000000000..fa4b07793f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/lshift.S
@@ -0,0 +1,103 @@
+/* i80386 __mpn_lshift --
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+12 /* space for 3 saved regs */
+#define RES PARMS
+#define S RES+4
+#define SIZE S+4
+#define CNT SIZE+4
+
+ .text
+ENTRY (__mpn_lshift)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 8)
+ movl S(%esp),%esi
+ cfi_rel_offset (esi, 4)
+ movl SIZE(%esp),%edx
+ movl CNT(%esp),%ecx
+ subl $4,%esi /* adjust s_ptr */
+
+ movl (%esi,%edx,4),%ebx /* read most significant limb */
+ cfi_rel_offset (ebx, 0)
+ cfi_remember_state
+ xorl %eax,%eax
+ shldl %cl,%ebx,%eax /* compute carry limb */
+ decl %edx
+ jz L(end)
+ pushl %eax /* push carry limb onto stack */
+ cfi_adjust_cfa_offset (4)
+ testb $1,%dl
+ jnz L(1) /* enter loop in the middle */
+ movl %ebx,%eax
+
+ ALIGN (3)
+L(oop): movl (%esi,%edx,4),%ebx /* load next lower limb */
+ shldl %cl,%ebx,%eax /* compute result limb */
+ movl %eax,(%edi,%edx,4) /* store it */
+ decl %edx
+L(1): movl (%esi,%edx,4),%eax
+ shldl %cl,%eax,%ebx
+ movl %ebx,(%edi,%edx,4)
+ decl %edx
+ jnz L(oop)
+
+ shll %cl,%eax /* compute least significant limb */
+ movl %eax,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+ cfi_adjust_cfa_offset (-4)
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+ cfi_restore_state
+L(end): shll %cl,%ebx /* compute least significant limb */
+ movl %ebx,(%edi) /* store it */
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_lshift)
diff --git a/REORG.TODO/sysdeps/i386/machine-gmon.h b/REORG.TODO/sysdeps/i386/machine-gmon.h
new file mode 100644
index 0000000000..d5d8cdf7c6
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/machine-gmon.h
@@ -0,0 +1,40 @@
+/* i386-specific implementation of profiling support.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* We need a special version of the `mcount' function since for ix86 it
+ must not clobber any register. This has several reasons:
+ - there is a bug in gcc as of version 2.7.2.2 which prohibits the
+ use of profiling together with nested functions
+ - the ELF `fixup' function uses GCC's regparm feature
+ - some (future) systems might want to pass parameters in registers. */
+
+/* We must not pollute the global namespace. */
+#define mcount_internal __mcount_internal
+
+extern void mcount_internal (u_long frompc, u_long selfpc) internal_function;
+
+#define _MCOUNT_DECL(frompc, selfpc) \
+void internal_function mcount_internal (u_long frompc, u_long selfpc)
+
+
+/* Define MCOUNT as empty since we have the implementation in another
+ file. */
+#define MCOUNT
diff --git a/REORG.TODO/sysdeps/i386/memchr.S b/REORG.TODO/sysdeps/i386/memchr.S
new file mode 100644
index 0000000000..db4a6418ff
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memchr.S
@@ -0,0 +1,322 @@
+/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
+ less than LEN. For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+ This version is developed using the same algorithm as the fast C
+ version which carries the following introduction:
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define RTN PARMS
+#define STR RTN
+#define CHR STR+4
+#define LEN CHR+4
+
+ .text
+ENTRY (__memchr)
+
+ /* Save callee-safe registers used in this function. */
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+
+ /* Load parameters into registers. */
+ movl STR(%esp), %eax /* str: pointer to memory block. */
+ movl CHR(%esp), %edx /* c: byte we are looking for. */
+ movl LEN(%esp), %esi /* len: length of memory block. */
+ cfi_rel_offset (esi, 4)
+
+ /* If my must not test more than three characters test
+ them one by one. This is especially true for 0. */
+ cmpl $4, %esi
+ jb L(3)
+
+ /* At the moment %edx contains CHR. What we need for the
+ algorithm is CHR in all bytes of the dword. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ movb %dl, %dh /* Now it is 0|0|c|c */
+ movl %edx, %ecx
+ shll $16, %edx /* Now c|c|0|0 */
+ movw %cx, %dx /* And finally c|c|c|c */
+
+ /* Better performance can be achieved if the word (32
+ bit) memory access is aligned on a four-byte-boundary.
+ So process first bytes one by one until boundary is
+ reached. Don't use a loop for better performance. */
+
+ testb $3, %al /* correctly aligned ? */
+ je L(2) /* yes => begin loop */
+ cmpb %dl, (%eax) /* compare byte */
+ je L(9) /* target found => return */
+ incl %eax /* increment source pointer */
+ decl %esi /* decrement length counter */
+ je L(4) /* len==0 => return NULL */
+
+ testb $3, %al /* correctly aligned ? */
+ je L(2) /* yes => begin loop */
+ cmpb %dl, (%eax) /* compare byte */
+ je L(9) /* target found => return */
+ incl %eax /* increment source pointer */
+ decl %esi /* decrement length counter */
+ je L(4) /* len==0 => return NULL */
+
+ testb $3, %al /* correctly aligned ? */
+ je L(2) /* yes => begin loop */
+ cmpb %dl, (%eax) /* compare byte */
+ je L(9) /* target found => return */
+ incl %eax /* increment source pointer */
+ decl %esi /* decrement length counter */
+ /* no test for len==0 here, because this is done in the
+ loop head */
+ jmp L(2)
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-31 is set, there will be a carry
+ into bit 32 (=carry flag), so all of the hole bits will
+ be changed.
+
+ 3) But wait! Aren't we looking for CHR, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is CHR. This turns each byte that is CHR
+ into a zero. */
+
+
+ /* Each round the main loop processes 16 bytes. */
+
+ ALIGN (4)
+
+L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+
+ /* According to the algorithm we had to reverse the effect of the
+ XOR first and then test the overflow bits. But because the
+ following XOR would destroy the carry flag and it would (in a
+ representation with more than 32 bits) not alter then last
+ overflow, we can now test this condition. If no carry is signaled
+ no overflow must have occurred in the last byte => it was 0. */
+ jnc L(8)
+
+ /* We are only interested in carry bits that change due to the
+ previous add, so remove original bits */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+
+ /* Now test for the other three overflow bits. */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+
+ /* If at least one byte of the word is CHR we don't get 0 in %edi. */
+ jnz L(8) /* found it => return pointer */
+
+ /* This process is unfolded four times for better performance.
+ we don't increment the source pointer each time. Instead we
+ use offsets and increment by 16 in each run of the loop. But
+ before probing for the matching byte we need some extra code
+ (following LL(13) below). Even the len can be compared with
+ constants instead of decrementing each time. */
+
+ movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(7) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(7) /* found it => return pointer */
+
+ movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(6) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(6) /* found it => return pointer */
+
+ movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(5) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(5) /* found it => return pointer */
+
+ /* Adjust both counters for a full round, i.e. 16 bytes. */
+ addl $16, %eax
+L(2): subl $16, %esi
+ jae L(1) /* Still more than 16 bytes remaining */
+
+ /* Process remaining bytes separately. */
+ cmpl $4-16, %esi /* rest < 4 bytes? */
+ jb L(3) /* yes, than test byte by byte */
+
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(8) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jne L(8) /* found it => return pointer */
+ addl $4, %eax /* adjust source pointer */
+
+ cmpl $8-16, %esi /* rest < 8 bytes? */
+ jb L(3) /* yes, than test byte by byte */
+
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(8) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jne L(8) /* found it => return pointer */
+ addl $4, %eax /* adjust source pointer */
+
+ cmpl $12-16, %esi /* rest < 12 bytes? */
+ jb L(3) /* yes, than test byte by byte */
+
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(8) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jne L(8) /* found it => return pointer */
+ addl $4, %eax /* adjust source pointer */
+
+ /* Check the remaining bytes one by one. */
+L(3): andl $3, %esi /* mask out uninteresting bytes */
+ jz L(4) /* no remaining bytes => return NULL */
+
+ cmpb %dl, (%eax) /* compare byte with CHR */
+ je L(9) /* equal, than return pointer */
+ incl %eax /* increment source pointer */
+ decl %esi /* decrement length */
+ jz L(4) /* no remaining bytes => return NULL */
+
+ cmpb %dl, (%eax) /* compare byte with CHR */
+ je L(9) /* equal, than return pointer */
+ incl %eax /* increment source pointer */
+ decl %esi /* decrement length */
+ jz L(4) /* no remaining bytes => return NULL */
+
+ cmpb %dl, (%eax) /* compare byte with CHR */
+ je L(9) /* equal, than return pointer */
+
+L(4): /* no byte found => return NULL */
+ xorl %eax, %eax
+ jmp L(9)
+
+ /* add missing source pointer increments */
+L(5): addl $4, %eax
+L(6): addl $4, %eax
+L(7): addl $4, %eax
+
+ /* Test for the matching byte in the word. %ecx contains a NUL
+ char in the byte which originally was the byte we are looking
+ at. */
+L(8): testb %cl, %cl /* test first byte in dword */
+ jz L(9) /* if zero => return pointer */
+ incl %eax /* increment source pointer */
+
+ testb %ch, %ch /* test second byte in dword */
+ jz L(9) /* if zero => return pointer */
+ incl %eax /* increment source pointer */
+
+ testl $0xff0000, %ecx /* test third byte in dword */
+ jz L(9) /* if zero => return pointer */
+ incl %eax /* increment source pointer */
+
+ /* No further test needed we we know it is one of the four bytes. */
+L(9): popl %edi /* pop saved registers */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+
+ ret
+END (__memchr)
+
+weak_alias (__memchr, memchr)
+libc_hidden_builtin_def (memchr)
diff --git a/REORG.TODO/sysdeps/i386/memcmp.S b/REORG.TODO/sysdeps/i386/memcmp.S
new file mode 100644
index 0000000000..01f8f8ef03
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memcmp.S
@@ -0,0 +1,73 @@
+/* Compare two memory blocks for differences in the first COUNT bytes.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define BLK1 PARMS
+#define BLK2 BLK1+4
+#define LEN BLK2+4
+
+ .text
+ENTRY (memcmp)
+
+ pushl %esi /* Save callee-safe registers. */
+ cfi_adjust_cfa_offset (4)
+ movl %edi, %edx /* Note that %edx is not used and can
+ so be used to save %edi. It's faster. */
+ cfi_register (edi, edx)
+
+ movl BLK1(%esp), %esi
+ cfi_rel_offset (esi, 0)
+ movl BLK2(%esp), %edi
+ movl LEN(%esp), %ecx
+
+ cld /* Set direction of comparison. */
+
+ xorl %eax, %eax /* Default result. */
+
+ repe /* Compare at most %ecx bytes. */
+ cmpsb
+ jz L(1) /* If even last byte was equal we return 0. */
+
+ /* The memory blocks are not equal. So result of the last
+ subtraction is present in the carry flag. It is set when
+ the byte in block #2 is bigger. In this case we have to
+ return -1 (=0xffffffff), else 1. */
+ sbbl %eax, %eax /* This is tricky. %eax == 0 and carry is set
+ or not depending on last subtraction. */
+
+ /* At this point %eax == 0, if the byte of block #1 was bigger, and
+ 0xffffffff if the last byte of block #2 was bigger. The latter
+ case is already correct but the former needs a little adjustment.
+ Note that the following operation does not change 0xffffffff. */
+ orb $1, %al /* Change 0 to 1. */
+
+L(1): popl %esi /* Restore registers. */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ movl %edx, %edi
+ cfi_restore (edi)
+
+ ret
+END (memcmp)
+
+#undef bcmp
+weak_alias (memcmp, bcmp)
+libc_hidden_builtin_def (memcmp)
diff --git a/REORG.TODO/sysdeps/i386/memcopy.h b/REORG.TODO/sysdeps/i386/memcopy.h
new file mode 100644
index 0000000000..dc6173ee29
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memcopy.h
@@ -0,0 +1,92 @@
+/* memcopy.h -- definitions for memory copy functions. i386 version.
+ Copyright (C) 1991-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdeps/generic/memcopy.h>
+
+#undef OP_T_THRES
+#define OP_T_THRES 8
+
+#undef BYTE_COPY_FWD
+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
+ do { \
+ int __d0; \
+ asm volatile(/* Clear the direction flag, so copying goes forward. */ \
+ "cld\n" \
+ /* Copy bytes. */ \
+ "rep\n" \
+ "movsb" : \
+ "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
+ "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
+ "memory"); \
+ } while (0)
+
+#undef BYTE_COPY_BWD
+#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
+ do \
+ { \
+ int __d0; \
+ asm volatile(/* Set the direction flag, so copying goes backwards. */ \
+ "std\n" \
+ /* Copy bytes. */ \
+ "rep\n" \
+ "movsb\n" \
+ /* Clear the dir flag. Convention says it should be 0. */ \
+ "cld" : \
+ "=D" (dst_ep), "=S" (src_ep), "=c" (__d0) : \
+ "0" (dst_ep - 1), "1" (src_ep - 1), "2" (nbytes) : \
+ "memory"); \
+ dst_ep += 1; \
+ src_ep += 1; \
+ } while (0)
+
+#undef WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
+ do \
+ { \
+ int __d0; \
+ asm volatile(/* Clear the direction flag, so copying goes forward. */ \
+ "cld\n" \
+ /* Copy longwords. */ \
+ "rep\n" \
+ "movsl" : \
+ "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
+ "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \
+ "memory"); \
+ (nbytes_left) = (nbytes) % 4; \
+ } while (0)
+
+#undef WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
+ do \
+ { \
+ int __d0; \
+ asm volatile(/* Set the direction flag, so copying goes backwards. */ \
+ "std\n" \
+ /* Copy longwords. */ \
+ "rep\n" \
+ "movsl\n" \
+ /* Clear the dir flag. Convention says it should be 0. */ \
+ "cld" : \
+ "=D" (dst_ep), "=S" (src_ep), "=c" (__d0) : \
+ "0" (dst_ep - 4), "1" (src_ep - 4), "2" ((nbytes) / 4) : \
+ "memory"); \
+ dst_ep += 4; \
+ src_ep += 4; \
+ (nbytes_left) = (nbytes) % 4; \
+ } while (0)
diff --git a/REORG.TODO/sysdeps/i386/memcpy.S b/REORG.TODO/sysdeps/i386/memcpy.S
new file mode 100644
index 0000000000..06568ea724
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memcpy.S
@@ -0,0 +1,95 @@
+/* memcpy with REP MOVSB/STOSB
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY memcpy
+# define MEMCPY_CHK __memcpy_chk
+#endif
+
+#ifdef USE_AS_BCOPY
+# define STR2 12
+# define STR1 STR2+4
+# define N STR1+4
+#else
+# define STR1 12
+# define STR2 STR1+4
+# define N STR2+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+ .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+ENTRY (MEMCPY)
+ PUSH (%esi)
+ PUSH (%edi)
+ movl N(%esp), %ecx
+ movl STR1(%esp), %edi
+ movl STR2(%esp), %esi
+ mov %edi, %eax
+#ifdef USE_AS_MEMPCPY
+ add %ecx, %eax
+#endif
+
+#ifdef USE_AS_MEMMOVE
+ cmp %esi, %edi
+ ja L(copy_backward)
+ je L(bwd_write_0bytes)
+#endif
+
+ rep movsb
+ POP (%edi)
+ POP (%esi)
+ ret
+
+#ifdef USE_AS_MEMMOVE
+L(copy_backward):
+ lea -1(%edi,%ecx), %edi
+ lea -1(%esi,%ecx), %esi
+ std
+ rep movsb
+ cld
+L(bwd_write_0bytes):
+ POP (%edi)
+ POP (%esi)
+ ret
+#endif
+
+END (MEMCPY)
+
+#ifndef USE_AS_BCOPY
+libc_hidden_builtin_def (MEMCPY)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/memcpy_chk.S b/REORG.TODO/sysdeps/i386/memcpy_chk.S
new file mode 100644
index 0000000000..0f6f585c41
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memcpy_chk.S
@@ -0,0 +1,34 @@
+/* Checking memcpy for i386.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+ /* For libc.so this is defined in memcpy.S.
+ For libc.a, this is a separate source to avoid
+ memcpy bringing in __chk_fail and all routines
+ it calls. */
+ .text
+ENTRY (__memcpy_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp memcpy
+END (__memcpy_chk)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/memmove.S b/REORG.TODO/sysdeps/i386/memmove.S
new file mode 100644
index 0000000000..60a45d21e0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memmove.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY memmove
+#define MEMCPY_CHK __memmove_chk
+#include "memcpy.S"
diff --git a/REORG.TODO/sysdeps/i386/memmove_chk.S b/REORG.TODO/sysdeps/i386/memmove_chk.S
new file mode 100644
index 0000000000..0c7037cc05
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memmove_chk.S
@@ -0,0 +1,33 @@
+/* Checking memmove for i386
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+/* For libc.so this is defined in memmove.S. For libc.a, this is a
+ separate source to avoid memmove bringing in __chk_fail and all
+ routines it calls. */
+ .text
+ENTRY (__memmove_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp memmove
+END (__memmove_chk)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/mempcpy.S b/REORG.TODO/sysdeps/i386/mempcpy.S
new file mode 100644
index 0000000000..61addb75f4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/mempcpy.S
@@ -0,0 +1,7 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY __mempcpy
+#define MEMCPY_CHK __mempcpy_chk
+#include "memcpy.S"
+
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/REORG.TODO/sysdeps/i386/mempcpy_chk.S b/REORG.TODO/sysdeps/i386/mempcpy_chk.S
new file mode 100644
index 0000000000..4d8ac5c25b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/mempcpy_chk.S
@@ -0,0 +1,33 @@
+/* Checking mempcpy for i386
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+/* For libc.so this is defined in mempcpy.S. For libc.a, this is a
+ separate source to avoid mempcpy bringing in __chk_fail and all
+ routines it calls. */
+ .text
+ENTRY (__mempcpy_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp mempcpy
+END (__mempcpy_chk)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/memset.S b/REORG.TODO/sysdeps/i386/memset.S
new file mode 100644
index 0000000000..46ae65d2e4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memset.S
@@ -0,0 +1,68 @@
+/* memset with REP MOVSB/STOSB
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define STR1 8
+#ifdef USE_AS_BZERO
+#define N STR1+4
+#else
+#define STR2 STR1+4
+#define N STR2+4
+#endif
+
+ .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk)
+#endif
+ENTRY (memset)
+ PUSH (%edi)
+ movl N(%esp), %ecx
+ movl STR1(%esp), %edi
+#ifdef USE_AS_BZERO
+ xor %eax, %eax
+#else
+ movzbl STR2(%esp), %eax
+ mov %edi, %edx
+#endif
+ rep stosb
+#ifndef USE_AS_BZERO
+ mov %edx, %eax
+#endif
+ POP (%edi)
+ ret
+END (memset)
+
+#ifndef USE_AS_BZERO
+libc_hidden_builtin_def (memset)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/memset_chk.S b/REORG.TODO/sysdeps/i386/memset_chk.S
new file mode 100644
index 0000000000..da7837111e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memset_chk.S
@@ -0,0 +1,33 @@
+/* Checking memset for i386.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+/* For libc.so this is defined in memset.S. For libc.a, this is a
+ separate source to avoid memset bringing in __chk_fail and all
+ routines it calls. */
+ .text
+ENTRY (__memset_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb __chk_fail
+ jmp memset
+END (__memset_chk)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/memusage.h b/REORG.TODO/sysdeps/i386/memusage.h
new file mode 100644
index 0000000000..30167be833
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/memusage.h
@@ -0,0 +1,20 @@
+/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; })
+
+#include <sysdeps/generic/memusage.h>
diff --git a/REORG.TODO/sysdeps/i386/mp_clz_tab.c b/REORG.TODO/sysdeps/i386/mp_clz_tab.c
new file mode 100644
index 0000000000..860f98cc62
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/mp_clz_tab.c
@@ -0,0 +1 @@
+/* __clz_tab not needed on i386. */
diff --git a/REORG.TODO/sysdeps/i386/mul_1.S b/REORG.TODO/sysdeps/i386/mul_1.S
new file mode 100644
index 0000000000..cf83d1b343
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/mul_1.S
@@ -0,0 +1,86 @@
+/* i80386 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ the result in a second limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define SIZE S1+4
+#define S2LIMB SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+ .text
+ENTRY (__mpn_mul_1)
+
+ pushl %res_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %s1_ptr
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %s2_limb
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp), %res_ptr
+ cfi_rel_offset (res_ptr, 12)
+ movl S1(%esp), %s1_ptr
+ cfi_rel_offset (s1_ptr, 8)
+ movl SIZE(%esp), %size
+ movl S2LIMB(%esp), %s2_limb
+ cfi_rel_offset (s2_limb, 0)
+ leal (%res_ptr,%size,4), %res_ptr
+ leal (%s1_ptr,%size,4), %s1_ptr
+ negl %size
+ xorl %ebp, %ebp
+ cfi_rel_offset (ebp, 4)
+ ALIGN (3)
+L(oop):
+ movl (%s1_ptr,%size,4), %eax
+ mull %s2_limb
+ addl %ebp, %eax
+ movl %eax, (%res_ptr,%size,4)
+ adcl $0, %edx
+ movl %edx, %ebp
+
+ incl %size
+ jnz L(oop)
+ movl %ebp, %eax
+
+ popl %s2_limb
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s2_limb)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %s1_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (s1_ptr)
+ popl %res_ptr
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (res_ptr)
+
+ ret
+#undef size
+END (__mpn_mul_1)
diff --git a/REORG.TODO/sysdeps/i386/nptl/Makefile b/REORG.TODO/sysdeps/i386/nptl/Makefile
new file mode 100644
index 0000000000..2c61b352eb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/Makefile
@@ -0,0 +1,26 @@
+# Copyright (C) 2002-2017 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tcb-offsets.sym
+endif
+
+ifeq ($(subdir),nptl)
+CFLAGS-pthread_create.c += -mpreferred-stack-boundary=4
+CFLAGS-tst-align.c += -mpreferred-stack-boundary=4
+CFLAGS-tst-align2.c += -mpreferred-stack-boundary=4
+endif
diff --git a/REORG.TODO/sysdeps/i386/nptl/pthread_spin_init.c b/REORG.TODO/sysdeps/i386/nptl/pthread_spin_init.c
new file mode 100644
index 0000000000..a1205b9698
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/pthread_spin_init.c
@@ -0,0 +1,19 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Not needed. pthread_spin_init is an alias for pthread_spin_unlock. */
diff --git a/REORG.TODO/sysdeps/i386/nptl/pthread_spin_lock.S b/REORG.TODO/sysdeps/i386/nptl/pthread_spin_lock.S
new file mode 100644
index 0000000000..160244b7a8
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/pthread_spin_lock.S
@@ -0,0 +1,37 @@
+/* Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <lowlevellock.h>
+
+ .globl pthread_spin_lock
+ .type pthread_spin_lock,@function
+ .align 16
+pthread_spin_lock:
+ mov 4(%esp), %eax
+1: LOCK
+ decl 0(%eax)
+ jne 2f
+ xor %eax, %eax
+ ret
+
+ .align 16
+2: rep
+ nop
+ cmpl $0, 0(%eax)
+ jg 1b
+ jmp 2b
+ .size pthread_spin_lock,.-pthread_spin_lock
diff --git a/REORG.TODO/sysdeps/i386/nptl/pthread_spin_unlock.S b/REORG.TODO/sysdeps/i386/nptl/pthread_spin_unlock.S
new file mode 100644
index 0000000000..b6636ae8d7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/pthread_spin_unlock.S
@@ -0,0 +1,31 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+ .globl pthread_spin_unlock
+ .type pthread_spin_unlock,@function
+ .align 16
+pthread_spin_unlock:
+ movl 4(%esp), %eax
+ movl $1, (%eax)
+ xorl %eax, %eax
+ ret
+ .size pthread_spin_unlock,.-pthread_spin_unlock
+
+ /* The implementation of pthread_spin_init is identical. */
+ .globl pthread_spin_init
+pthread_spin_init = pthread_spin_unlock
diff --git a/REORG.TODO/sysdeps/i386/nptl/pthreaddef.h b/REORG.TODO/sysdeps/i386/nptl/pthreaddef.h
new file mode 100644
index 0000000000..54abccd11b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/pthreaddef.h
@@ -0,0 +1,40 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Default stack size. */
+#define ARCH_STACK_DEFAULT_SIZE (2 * 1024 * 1024)
+
+/* Required stack pointer alignment at beginning. SSE requires 16
+ bytes. */
+#define STACK_ALIGN 16
+
+/* Minimal stack size after allocating thread descriptor and guard size. */
+#define MINIMAL_REST_STACK 2048
+
+/* Alignment requirement for TCB.
+
+ Some processors such as Intel Atom pay a big penalty on every
+ access using a segment override if that segment's base is not
+ aligned to the size of a cache line. (See Intel 64 and IA-32
+ Architectures Optimization Reference Manual, section 13.3.3.3,
+ "Segment Base".) On such machines, a cache line is 64 bytes. */
+#define TCB_ALIGNMENT 64
+
+
+/* Location of current stack frame. */
+#define CURRENT_STACK_FRAME __builtin_frame_address (0)
diff --git a/REORG.TODO/sysdeps/i386/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/i386/nptl/tcb-offsets.sym
new file mode 100644
index 0000000000..695a810386
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/tcb-offsets.sym
@@ -0,0 +1,17 @@
+#include <sysdep.h>
+#include <tls.h>
+#include <kernel-features.h>
+
+RESULT offsetof (struct pthread, result)
+TID offsetof (struct pthread, tid)
+CANCELHANDLING offsetof (struct pthread, cancelhandling)
+CLEANUP_JMP_BUF offsetof (struct pthread, cleanup_jmp_buf)
+MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
+SYSINFO_OFFSET offsetof (tcbhead_t, sysinfo)
+CLEANUP offsetof (struct pthread, cleanup)
+CLEANUP_PREV offsetof (struct _pthread_cleanup_buffer, __prev)
+MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock)
+POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
+#ifndef __ASSUME_PRIVATE_FUTEX
+PRIVATE_FUTEX offsetof (tcbhead_t, private_futex)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/nptl/tls.h b/REORG.TODO/sysdeps/i386/nptl/tls.h
new file mode 100644
index 0000000000..f9a6b11ecf
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/nptl/tls.h
@@ -0,0 +1,435 @@
+/* Definition for thread-local data handling. nptl/i386 version.
+ Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _TLS_H
+#define _TLS_H 1
+
+#include <dl-sysdep.h>
+#ifndef __ASSEMBLER__
+# include <stdbool.h>
+# include <stddef.h>
+# include <stdint.h>
+# include <stdlib.h>
+# include <sysdep.h>
+# include <libc-pointer-arith.h> /* For cast_to_integer. */
+# include <kernel-features.h>
+# include <dl-dtv.h>
+
+typedef struct
+{
+ void *tcb; /* Pointer to the TCB. Not necessarily the
+ thread descriptor used by libpthread. */
+ dtv_t *dtv;
+ void *self; /* Pointer to the thread descriptor. */
+ int multiple_threads;
+ uintptr_t sysinfo;
+ uintptr_t stack_guard;
+ uintptr_t pointer_guard;
+ int gscope_flag;
+#ifndef __ASSUME_PRIVATE_FUTEX
+ int private_futex;
+#else
+ int __glibc_reserved1;
+#endif
+ /* Reservation of some values for the TM ABI. */
+ void *__private_tm[4];
+ /* GCC split stack support. */
+ void *__private_ss;
+} tcbhead_t;
+
+# define TLS_MULTIPLE_THREADS_IN_TCB 1
+
+#else /* __ASSEMBLER__ */
+# include <tcb-offsets.h>
+#endif
+
+
+/* Alignment requirement for the stack. For IA-32 this is governed by
+ the SSE memory functions. */
+#define STACK_ALIGN 16
+
+#ifndef __ASSEMBLER__
+/* Get system call information. */
+# include <sysdep.h>
+
+/* The old way: using LDT. */
+
+/* Structure passed to `modify_ldt', 'set_thread_area', and 'clone' calls. */
+struct user_desc
+{
+ unsigned int entry_number;
+ unsigned long int base_addr;
+ unsigned int limit;
+ unsigned int seg_32bit:1;
+ unsigned int contents:2;
+ unsigned int read_exec_only:1;
+ unsigned int limit_in_pages:1;
+ unsigned int seg_not_present:1;
+ unsigned int useable:1;
+ unsigned int empty:25;
+};
+
+/* Initializing bit fields is slow. We speed it up by using a union. */
+union user_desc_init
+{
+ struct user_desc desc;
+ unsigned int vals[4];
+};
+
+
+/* This is the size of the initial TCB. Can't be just sizeof (tcbhead_t),
+ because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole
+ struct pthread even when not linked with -lpthread. */
+# define TLS_INIT_TCB_SIZE sizeof (struct pthread)
+
+/* Alignment requirements for the initial TCB. */
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
+
+/* This is the size of the TCB. */
+# define TLS_TCB_SIZE sizeof (struct pthread)
+
+/* Alignment requirements for the TCB. */
+# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+
+/* The TCB can have any size and the memory following the address the
+ thread pointer points to is unspecified. Allocate the TCB there. */
+# define TLS_TCB_AT_TP 1
+# define TLS_DTV_AT_TP 0
+
+/* Get the thread descriptor definition. */
+# include <nptl/descr.h>
+
+
+/* Install the dtv pointer. The pointer passed is to the element with
+ index -1 which contain the length. */
+# define INSTALL_DTV(descr, dtvp) \
+ ((tcbhead_t *) (descr))->dtv = (dtvp) + 1
+
+/* Install new dtv for current thread. */
+# define INSTALL_NEW_DTV(dtvp) \
+ ({ struct pthread *__pd; \
+ THREAD_SETMEM (__pd, header.dtv, (dtvp)); })
+
+/* Return dtv of given thread descriptor. */
+# define GET_DTV(descr) \
+ (((tcbhead_t *) (descr))->dtv)
+
+/* Macros to load from and store into segment registers. */
+# ifndef TLS_GET_GS
+# define TLS_GET_GS() \
+ ({ int __seg; __asm ("movw %%gs, %w0" : "=q" (__seg)); __seg & 0xffff; })
+# endif
+# ifndef TLS_SET_GS
+# define TLS_SET_GS(val) \
+ __asm ("movw %w0, %%gs" :: "q" (val))
+# endif
+
+#ifdef NEED_DL_SYSINFO
+# define INIT_SYSINFO \
+ _head->sysinfo = GLRO(dl_sysinfo)
+# define SETUP_THREAD_SYSINFO(pd) \
+ ((pd)->header.sysinfo = THREAD_GETMEM (THREAD_SELF, header.sysinfo))
+# define CHECK_THREAD_SYSINFO(pd) \
+ assert ((pd)->header.sysinfo == THREAD_GETMEM (THREAD_SELF, header.sysinfo))
+#else
+# define INIT_SYSINFO
+#endif
+
+#ifndef LOCK_PREFIX
+# ifdef UP
+# define LOCK_PREFIX /* nothing */
+# else
+# define LOCK_PREFIX "lock;"
+# endif
+#endif
+
+static inline void __attribute__ ((unused, always_inline))
+tls_fill_user_desc (union user_desc_init *desc,
+ unsigned int entry_number,
+ void *pd)
+{
+ desc->vals[0] = entry_number;
+ /* The 'base_addr' field. Pointer to the TCB. */
+ desc->vals[1] = (unsigned long int) pd;
+ /* The 'limit' field. We use 4GB which is 0xfffff pages. */
+ desc->vals[2] = 0xfffff;
+ /* Collapsed value of the bitfield:
+ .seg_32bit = 1
+ .contents = 0
+ .read_exec_only = 0
+ .limit_in_pages = 1
+ .seg_not_present = 0
+ .useable = 1 */
+ desc->vals[3] = 0x51;
+}
+
+/* Code to initially initialize the thread pointer. This might need
+ special attention since 'errno' is not yet available and if the
+ operation can cause a failure 'errno' must not be touched. */
+# define TLS_INIT_TP(thrdescr) \
+ ({ void *_thrdescr = (thrdescr); \
+ tcbhead_t *_head = _thrdescr; \
+ union user_desc_init _segdescr; \
+ int _result; \
+ \
+ _head->tcb = _thrdescr; \
+ /* For now the thread descriptor is at the same address. */ \
+ _head->self = _thrdescr; \
+ /* New syscall handling support. */ \
+ INIT_SYSINFO; \
+ \
+ /* Let the kernel pick a value for the 'entry_number' field. */ \
+ tls_fill_user_desc (&_segdescr, -1, _thrdescr); \
+ \
+ /* Install the TLS. */ \
+ INTERNAL_SYSCALL_DECL (err); \
+ _result = INTERNAL_SYSCALL (set_thread_area, err, 1, &_segdescr.desc); \
+ \
+ if (_result == 0) \
+ /* We know the index in the GDT, now load the segment register. \
+ The use of the GDT is described by the value 3 in the lower \
+ three bits of the segment descriptor value. \
+ \
+ Note that we have to do this even if the numeric value of \
+ the descriptor does not change. Loading the segment register \
+ causes the segment information from the GDT to be loaded \
+ which is necessary since we have changed it. */ \
+ TLS_SET_GS (_segdescr.desc.entry_number * 8 + 3); \
+ \
+ _result == 0 ? NULL \
+ : "set_thread_area failed when setting up thread-local storage\n"; })
+
+# define TLS_DEFINE_INIT_TP(tp, pd) \
+ union user_desc_init _segdescr; \
+ /* Find the 'entry_number' field that the kernel selected in TLS_INIT_TP. \
+ The first three bits of the segment register value select the GDT, \
+ ignore them. We get the index from the value of the %gs register in \
+ the current thread. */ \
+ tls_fill_user_desc (&_segdescr, TLS_GET_GS () >> 3, pd); \
+ const struct user_desc *tp = &_segdescr.desc
+
+
+/* Return the address of the dtv for the current thread. */
+# define THREAD_DTV() \
+ ({ struct pthread *__pd; \
+ THREAD_GETMEM (__pd, header.dtv); })
+
+
+/* Return the thread descriptor for the current thread.
+
+ The contained asm must *not* be marked volatile since otherwise
+ assignments like
+ pthread_descr self = thread_self();
+ do not get optimized away. */
+# define THREAD_SELF \
+ ({ struct pthread *__self; \
+ asm ("movl %%gs:%c1,%0" : "=r" (__self) \
+ : "i" (offsetof (struct pthread, header.self))); \
+ __self;})
+
+/* Magic for libthread_db to know how to do THREAD_SELF. */
+# define DB_THREAD_SELF \
+ REGISTER_THREAD_AREA (32, offsetof (struct user_regs_struct, xgs), 3) \
+ REGISTER_THREAD_AREA (64, 26 * 8, 3) /* x86-64's user_regs_struct->gs */
+
+
+/* Read member of the thread descriptor directly. */
+# define THREAD_GETMEM(descr, member) \
+ ({ __typeof (descr->member) __value; \
+ if (sizeof (__value) == 1) \
+ asm volatile ("movb %%gs:%P2,%b0" \
+ : "=q" (__value) \
+ : "0" (0), "i" (offsetof (struct pthread, member))); \
+ else if (sizeof (__value) == 4) \
+ asm volatile ("movl %%gs:%P1,%0" \
+ : "=r" (__value) \
+ : "i" (offsetof (struct pthread, member))); \
+ else \
+ { \
+ if (sizeof (__value) != 8) \
+ /* There should not be any value with a size other than 1, \
+ 4 or 8. */ \
+ abort (); \
+ \
+ asm volatile ("movl %%gs:%P1,%%eax\n\t" \
+ "movl %%gs:%P2,%%edx" \
+ : "=A" (__value) \
+ : "i" (offsetof (struct pthread, member)), \
+ "i" (offsetof (struct pthread, member) + 4)); \
+ } \
+ __value; })
+
+
+/* Same as THREAD_GETMEM, but the member offset can be non-constant. */
+# define THREAD_GETMEM_NC(descr, member, idx) \
+ ({ __typeof (descr->member[0]) __value; \
+ if (sizeof (__value) == 1) \
+ asm volatile ("movb %%gs:%P2(%3),%b0" \
+ : "=q" (__value) \
+ : "0" (0), "i" (offsetof (struct pthread, member[0])), \
+ "r" (idx)); \
+ else if (sizeof (__value) == 4) \
+ asm volatile ("movl %%gs:%P1(,%2,4),%0" \
+ : "=r" (__value) \
+ : "i" (offsetof (struct pthread, member[0])), \
+ "r" (idx)); \
+ else \
+ { \
+ if (sizeof (__value) != 8) \
+ /* There should not be any value with a size other than 1, \
+ 4 or 8. */ \
+ abort (); \
+ \
+ asm volatile ("movl %%gs:%P1(,%2,8),%%eax\n\t" \
+ "movl %%gs:4+%P1(,%2,8),%%edx" \
+ : "=&A" (__value) \
+ : "i" (offsetof (struct pthread, member[0])), \
+ "r" (idx)); \
+ } \
+ __value; })
+
+
+
+/* Set member of the thread descriptor directly. */
+# define THREAD_SETMEM(descr, member, value) \
+ ({ if (sizeof (descr->member) == 1) \
+ asm volatile ("movb %b0,%%gs:%P1" : \
+ : "iq" (value), \
+ "i" (offsetof (struct pthread, member))); \
+ else if (sizeof (descr->member) == 4) \
+ asm volatile ("movl %0,%%gs:%P1" : \
+ : "ir" (value), \
+ "i" (offsetof (struct pthread, member))); \
+ else \
+ { \
+ if (sizeof (descr->member) != 8) \
+ /* There should not be any value with a size other than 1, \
+ 4 or 8. */ \
+ abort (); \
+ \
+ asm volatile ("movl %%eax,%%gs:%P1\n\t" \
+ "movl %%edx,%%gs:%P2" : \
+ : "A" ((uint64_t) cast_to_integer (value)), \
+ "i" (offsetof (struct pthread, member)), \
+ "i" (offsetof (struct pthread, member) + 4)); \
+ }})
+
+
+/* Same as THREAD_SETMEM, but the member offset can be non-constant. */
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+ ({ if (sizeof (descr->member[0]) == 1) \
+ asm volatile ("movb %b0,%%gs:%P1(%2)" : \
+ : "iq" (value), \
+ "i" (offsetof (struct pthread, member)), \
+ "r" (idx)); \
+ else if (sizeof (descr->member[0]) == 4) \
+ asm volatile ("movl %0,%%gs:%P1(,%2,4)" : \
+ : "ir" (value), \
+ "i" (offsetof (struct pthread, member)), \
+ "r" (idx)); \
+ else \
+ { \
+ if (sizeof (descr->member[0]) != 8) \
+ /* There should not be any value with a size other than 1, \
+ 4 or 8. */ \
+ abort (); \
+ \
+ asm volatile ("movl %%eax,%%gs:%P1(,%2,8)\n\t" \
+ "movl %%edx,%%gs:4+%P1(,%2,8)" : \
+ : "A" ((uint64_t) cast_to_integer (value)), \
+ "i" (offsetof (struct pthread, member)), \
+ "r" (idx)); \
+ }})
+
+
+/* Atomic compare and exchange on TLS, returning old value. */
+#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+ ({ __typeof (descr->member) __ret; \
+ __typeof (oldval) __old = (oldval); \
+ if (sizeof (descr->member) == 4) \
+ asm volatile (LOCK_PREFIX "cmpxchgl %2, %%gs:%P3" \
+ : "=a" (__ret) \
+ : "0" (__old), "r" (newval), \
+ "i" (offsetof (struct pthread, member))); \
+ else \
+ /* Not necessary for other sizes in the moment. */ \
+ abort (); \
+ __ret; })
+
+
+/* Atomic logical and. */
+#define THREAD_ATOMIC_AND(descr, member, val) \
+ (void) ({ if (sizeof ((descr)->member) == 4) \
+ asm volatile (LOCK_PREFIX "andl %1, %%gs:%P0" \
+ :: "i" (offsetof (struct pthread, member)), \
+ "ir" (val)); \
+ else \
+ /* Not necessary for other sizes in the moment. */ \
+ abort (); })
+
+
+/* Atomic set bit. */
+#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+ (void) ({ if (sizeof ((descr)->member) == 4) \
+ asm volatile (LOCK_PREFIX "orl %1, %%gs:%P0" \
+ :: "i" (offsetof (struct pthread, member)), \
+ "ir" (1 << (bit))); \
+ else \
+ /* Not necessary for other sizes in the moment. */ \
+ abort (); })
+
+
+/* Set the stack guard field in TCB head. */
+#define THREAD_SET_STACK_GUARD(value) \
+ THREAD_SETMEM (THREAD_SELF, header.stack_guard, value)
+#define THREAD_COPY_STACK_GUARD(descr) \
+ ((descr)->header.stack_guard \
+ = THREAD_GETMEM (THREAD_SELF, header.stack_guard))
+
+
+/* Set the pointer guard field in the TCB head. */
+#define THREAD_SET_POINTER_GUARD(value) \
+ THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
+#define THREAD_COPY_POINTER_GUARD(descr) \
+ ((descr)->header.pointer_guard \
+ = THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
+
+
+/* Get and set the global scope generation counter in the TCB head. */
+#define THREAD_GSCOPE_FLAG_UNUSED 0
+#define THREAD_GSCOPE_FLAG_USED 1
+#define THREAD_GSCOPE_FLAG_WAIT 2
+#define THREAD_GSCOPE_RESET_FLAG() \
+ do \
+ { int __res; \
+ asm volatile ("xchgl %0, %%gs:%P1" \
+ : "=r" (__res) \
+ : "i" (offsetof (struct pthread, header.gscope_flag)), \
+ "0" (THREAD_GSCOPE_FLAG_UNUSED)); \
+ if (__res == THREAD_GSCOPE_FLAG_WAIT) \
+ lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \
+ } \
+ while (0)
+#define THREAD_GSCOPE_SET_FLAG() \
+ THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
+#define THREAD_GSCOPE_WAIT() \
+ GL(dl_wait_lookup_done) ()
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* tls.h */
diff --git a/REORG.TODO/sysdeps/i386/preconfigure b/REORG.TODO/sysdeps/i386/preconfigure
new file mode 100644
index 0000000000..c8fefd1bff
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/preconfigure
@@ -0,0 +1,5 @@
+# preconfigure fragment for i386.
+
+case "$machine" in
+i[4567]86) base_machine=i386 machine=i386/$machine ;;
+esac
diff --git a/REORG.TODO/sysdeps/i386/pthread_spin_trylock.S b/REORG.TODO/sysdeps/i386/pthread_spin_trylock.S
new file mode 100644
index 0000000000..f71a9fcb2d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/pthread_spin_trylock.S
@@ -0,0 +1,46 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <pthread-errnos.h>
+
+
+#ifdef UP
+# define LOCK
+#else
+# define LOCK lock
+#endif
+
+ .globl pthread_spin_trylock
+ .type pthread_spin_trylock,@function
+ .align 16
+pthread_spin_trylock:
+ movl 4(%esp), %edx
+ movl $1, %eax
+ xorl %ecx, %ecx
+ LOCK
+ cmpxchgl %ecx, (%edx)
+ movl $EBUSY, %eax
+#ifdef HAVE_CMOV
+ cmovel %ecx, %eax
+#else
+ jne 0f
+ movl %ecx, %eax
+0:
+#endif
+ ret
+ .size pthread_spin_trylock,.-pthread_spin_trylock
diff --git a/REORG.TODO/sysdeps/i386/rawmemchr.S b/REORG.TODO/sysdeps/i386/rawmemchr.S
new file mode 100644
index 0000000000..246ec3f18e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/rawmemchr.S
@@ -0,0 +1,222 @@
+/* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+ This version is developed using the same algorithm as the fast C
+ version which carries the following introduction:
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define RTN PARMS
+#define STR RTN
+#define CHR STR+4
+
+ .text
+ENTRY (__rawmemchr)
+
+ /* Save callee-safe register used in this function. */
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+
+ /* Load parameters into registers. */
+ movl STR(%esp), %eax
+ movl CHR(%esp), %edx
+
+ /* At the moment %edx contains C. What we need for the
+ algorithm is C in all bytes of the dword. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ movb %dl, %dh /* Now it is 0|0|c|c */
+ movl %edx, %ecx
+ shll $16, %edx /* Now c|c|0|0 */
+ movw %cx, %dx /* And finally c|c|c|c */
+
+ /* Better performance can be achieved if the word (32
+ bit) memory access is aligned on a four-byte-boundary.
+ So process first bytes one by one until boundary is
+ reached. Don't use a loop for better performance. */
+
+ testb $3, %al /* correctly aligned ? */
+ je L(1) /* yes => begin loop */
+ cmpb %dl, (%eax) /* compare byte */
+ je L(9) /* target found => return */
+ incl %eax /* increment source pointer */
+
+ testb $3, %al /* correctly aligned ? */
+ je L(1) /* yes => begin loop */
+ cmpb %dl, (%eax) /* compare byte */
+ je L(9) /* target found => return */
+ incl %eax /* increment source pointer */
+
+ testb $3, %al /* correctly aligned ? */
+ je L(1) /* yes => begin loop */
+ cmpb %dl, (%eax) /* compare byte */
+ je L(9) /* target found => return */
+ incl %eax /* increment source pointer */
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-31 is set, there will be a carry
+ into bit 32 (=carry flag), so all of the hole bits will
+ be changed.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+
+ /* Each round the main loop processes 16 bytes. */
+ ALIGN (4)
+
+L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+
+ /* According to the algorithm we had to reverse the effect of the
+ XOR first and then test the overflow bits. But because the
+ following XOR would destroy the carry flag and it would (in a
+ representation with more than 32 bits) not alter then last
+ overflow, we can now test this condition. If no carry is signaled
+ no overflow must have occurred in the last byte => it was 0. */
+ jnc L(8)
+
+ /* We are only interested in carry bits that change due to the
+ previous add, so remove original bits */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+
+ /* Now test for the other three overflow bits. */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+
+ /* If at least one byte of the word is C we don't get 0 in %edi. */
+ jnz L(8) /* found it => return pointer */
+
+ /* This process is unfolded four times for better performance.
+ we don't increment the source pointer each time. Instead we
+ use offsets and increment by 16 in each run of the loop. But
+ before probing for the matching byte we need some extra code
+ (following LL(13) below). Even the len can be compared with
+ constants instead of decrementing each time. */
+
+ movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(7) /* highest byte is C => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(7) /* found it => return pointer */
+
+ movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(6) /* highest byte is C => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(6) /* found it => return pointer */
+
+ movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(5) /* highest byte is C => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(5) /* found it => return pointer */
+
+ /* Adjust both counters for a full round, i.e. 16 bytes. */
+ addl $16, %eax
+ jmp L(1)
+ /* add missing source pointer increments */
+L(5): addl $4, %eax
+L(6): addl $4, %eax
+L(7): addl $4, %eax
+
+ /* Test for the matching byte in the word. %ecx contains a NUL
+ char in the byte which originally was the byte we are looking
+ at. */
+L(8): testb %cl, %cl /* test first byte in dword */
+ jz L(9) /* if zero => return pointer */
+ incl %eax /* increment source pointer */
+
+ testb %ch, %ch /* test second byte in dword */
+ jz L(9) /* if zero => return pointer */
+ incl %eax /* increment source pointer */
+
+ testl $0xff0000, %ecx /* test third byte in dword */
+ jz L(9) /* if zero => return pointer */
+ incl %eax /* increment source pointer */
+
+ /* No further test needed we we know it is one of the four bytes. */
+
+L(9):
+ popl %edi /* pop saved register */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__rawmemchr)
+
+libc_hidden_def (__rawmemchr)
+weak_alias (__rawmemchr, rawmemchr)
diff --git a/REORG.TODO/sysdeps/i386/rshift.S b/REORG.TODO/sysdeps/i386/rshift.S
new file mode 100644
index 0000000000..cf179052b5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/rshift.S
@@ -0,0 +1,105 @@
+/* i80386 __mpn_rshift --
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+12 /* space for 3 saved regs */
+#define RES PARMS
+#define S RES+4
+#define SIZE S+4
+#define CNT SIZE+4
+
+ .text
+ENTRY (__mpn_rshift)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 8)
+ movl S(%esp),%esi
+ cfi_rel_offset (esi, 4)
+ movl SIZE(%esp),%edx
+ movl CNT(%esp),%ecx
+ leal -4(%edi,%edx,4),%edi
+ leal (%esi,%edx,4),%esi
+ negl %edx
+
+ movl (%esi,%edx,4),%ebx /* read least significant limb */
+ cfi_rel_offset (ebx, 0)
+ cfi_remember_state
+ xorl %eax,%eax
+ shrdl %cl,%ebx,%eax /* compute carry limb */
+ incl %edx
+ jz L(end)
+ pushl %eax /* push carry limb onto stack */
+ cfi_adjust_cfa_offset (4)
+ testb $1,%dl
+ jnz L(1) /* enter loop in the middle */
+ movl %ebx,%eax
+
+ ALIGN (3)
+L(oop): movl (%esi,%edx,4),%ebx /* load next higher limb */
+ shrdl %cl,%ebx,%eax /* compute result limb */
+ movl %eax,(%edi,%edx,4) /* store it */
+ incl %edx
+L(1): movl (%esi,%edx,4),%eax
+ shrdl %cl,%eax,%ebx
+ movl %ebx,(%edi,%edx,4)
+ incl %edx
+ jnz L(oop)
+
+ shrl %cl,%eax /* compute most significant limb */
+ movl %eax,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+ cfi_adjust_cfa_offset (-4)
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+ cfi_restore_state
+L(end): shrl %cl,%ebx /* compute most significant limb */
+ movl %ebx,(%edi) /* store it */
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_rshift)
diff --git a/REORG.TODO/sysdeps/i386/setfpucw.c b/REORG.TODO/sysdeps/i386/setfpucw.c
new file mode 100644
index 0000000000..40b995f18a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/setfpucw.c
@@ -0,0 +1,54 @@
+/* Set the FPU control word for x86.
+ Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <fpu_control.h>
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+void
+__setfpucw (fpu_control_t set)
+{
+ fpu_control_t cw;
+
+ /* Fetch the current control word. */
+ __asm__ ("fnstcw %0" : "=m" (*&cw));
+
+ /* Preserve the reserved bits, and set the rest as the user
+ specified (or the default, if the user gave zero). */
+ cw &= _FPU_RESERVED;
+ cw |= set & ~_FPU_RESERVED;
+
+ __asm__ ("fldcw %0" : : "m" (*&cw));
+
+ /* If the CPU supports SSE, we set the MXCSR as well. */
+ if (HAS_CPU_FEATURE (SSE))
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7));
+ xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7);
+
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+}
diff --git a/REORG.TODO/sysdeps/i386/setjmp.S b/REORG.TODO/sysdeps/i386/setjmp.S
new file mode 100644
index 0000000000..738a899e8b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/setjmp.S
@@ -0,0 +1,58 @@
+/* setjmp for i386.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+#include <asm-syntax.h>
+#include <stap-probe.h>
+
+#define PARMS 4 /* no space for saved regs */
+#define JMPBUF PARMS
+#define SIGMSK JMPBUF+4
+
+ENTRY (__sigsetjmp)
+
+ movl JMPBUF(%esp), %eax
+
+ /* Save registers. */
+ movl %ebx, (JB_BX*4)(%eax)
+ movl %esi, (JB_SI*4)(%eax)
+ movl %edi, (JB_DI*4)(%eax)
+ leal JMPBUF(%esp), %ecx /* Save SP as it will be after we return. */
+#ifdef PTR_MANGLE
+ PTR_MANGLE (%ecx)
+#endif
+ movl %ecx, (JB_SP*4)(%eax)
+ movl 0(%esp), %ecx /* Save PC we are returning to now. */
+ LIBC_PROBE (setjmp, 3, 4@%eax, -4@SIGMSK(%esp), 4@%ecx)
+#ifdef PTR_MANGLE
+ PTR_MANGLE (%ecx)
+#endif
+ movl %ecx, (JB_PC*4)(%eax)
+ movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */
+
+#if IS_IN (rtld)
+ /* In ld.so we never save the signal mask. */
+ xorl %eax, %eax
+ ret
+#else
+ /* Make a tail call to __sigjmp_save; it takes the same args. */
+ jmp __sigjmp_save
+#endif
+END (__sigsetjmp)
+hidden_def (__sigsetjmp)
diff --git a/REORG.TODO/sysdeps/i386/stackguard-macros.h b/REORG.TODO/sysdeps/i386/stackguard-macros.h
new file mode 100644
index 0000000000..039762927c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/stackguard-macros.h
@@ -0,0 +1,12 @@
+#include <stdint.h>
+
+#define STACK_CHK_GUARD \
+ ({ uintptr_t x; asm ("movl %%gs:0x14, %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ \
+ uintptr_t x; \
+ asm ("movl %%gs:%c1, %0" : "=r" (x) \
+ : "i" (offsetof (tcbhead_t, pointer_guard))); \
+ x; \
+ })
diff --git a/REORG.TODO/sysdeps/i386/stackinfo.h b/REORG.TODO/sysdeps/i386/stackinfo.h
new file mode 100644
index 0000000000..ba17867d3a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/stackinfo.h
@@ -0,0 +1,43 @@
+/* Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a bit of information about the stack allocation
+ of the processor. */
+
+#ifndef _STACKINFO_H
+#define _STACKINFO_H 1
+
+#include <elf.h>
+
+/* On x86 the stack grows down. */
+#define _STACK_GROWS_DOWN 1
+
+/* Default to an executable stack. PF_X can be overridden if PT_GNU_STACK is
+ * present, but it is presumed absent. */
+#define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X)
+
+/* Access to the stack pointer. The macros are used in alloca_account
+ for which they need to act as barriers as well, hence the additional
+ (unnecessary) parameters. */
+#define stackinfo_get_sp() \
+ ({ void *p__; asm volatile ("mov %%esp, %0" : "=r" (p__)); p__; })
+#define stackinfo_sub_sp(ptr) \
+ ({ ptrdiff_t d__; \
+ asm volatile ("sub %%esp, %0" : "=r" (d__) : "0" (ptr)); \
+ d__; })
+
+#endif /* stackinfo.h */
diff --git a/REORG.TODO/sysdeps/i386/start.S b/REORG.TODO/sysdeps/i386/start.S
new file mode 100644
index 0000000000..ccb1e2b38f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/start.S
@@ -0,0 +1,139 @@
+/* Startup code compliant to the ELF i386 ABI.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is the canonical entry point, usually the first thing in the text
+ segment. The SVR4/i386 ABI (pages 3-31, 3-32) says that when the entry
+ point runs, most registers' values are unspecified, except for:
+
+ %edx Contains a function pointer to be registered with `atexit'.
+ This is how the dynamic linker arranges to have DT_FINI
+ functions called for shared libraries that have been loaded
+ before this code runs.
+
+ %esp The stack contains the arguments and environment:
+ 0(%esp) argc
+ 4(%esp) argv[0]
+ ...
+ (4*argc)(%esp) NULL
+ (4*(argc+1))(%esp) envp[0]
+ ...
+ NULL
+*/
+
+ .text
+ .globl _start
+ .type _start,@function
+_start:
+ /* Clear the frame pointer. The ABI suggests this be done, to mark
+ the outermost frame obviously. */
+ xorl %ebp, %ebp
+
+ /* Extract the arguments as encoded on the stack and set up
+ the arguments for `main': argc, argv. envp will be determined
+ later in __libc_start_main. */
+ popl %esi /* Pop the argument count. */
+ movl %esp, %ecx /* argv starts just at the current stack top.*/
+
+ /* Before pushing the arguments align the stack to a 16-byte
+ (SSE needs 16-byte alignment) boundary to avoid penalties from
+ misaligned accesses. Thanks to Edward Seidl <seidl@janed.com>
+ for pointing this out. */
+ andl $0xfffffff0, %esp
+ pushl %eax /* Push garbage because we allocate
+ 28 more bytes. */
+
+ /* Provide the highest stack address to the user code (for stacks
+ which grow downwards). */
+ pushl %esp
+
+ pushl %edx /* Push address of the shared library
+ termination function. */
+
+#ifdef SHARED
+ /* Load PIC register. */
+ call 1f
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+
+ /* Push address of our own entry points to .fini and .init. */
+ leal __libc_csu_fini@GOTOFF(%ebx), %eax
+ pushl %eax
+ leal __libc_csu_init@GOTOFF(%ebx), %eax
+ pushl %eax
+
+ pushl %ecx /* Push second argument: argv. */
+ pushl %esi /* Push first argument: argc. */
+
+ pushl main@GOT(%ebx)
+
+ /* Call the user's main function, and exit with its value.
+ But let the libc call main. */
+ call __libc_start_main@PLT
+#else
+ /* Push address of our own entry points to .fini and .init. */
+ pushl $__libc_csu_fini
+ pushl $__libc_csu_init
+
+ pushl %ecx /* Push second argument: argv. */
+ pushl %esi /* Push first argument: argc. */
+
+ pushl $main
+
+ /* Call the user's main function, and exit with its value.
+ But let the libc call main. */
+ call __libc_start_main
+#endif
+
+ hlt /* Crash if somehow `exit' does return. */
+
+#ifdef SHARED
+1: movl (%esp), %ebx
+ ret
+#endif
+
+/* To fulfill the System V/i386 ABI we need this symbol. Yuck, it's so
+ meaningless since we don't support machines < 80386. */
+ .section .rodata
+ .globl _fp_hw
+_fp_hw: .long 3
+ .size _fp_hw, 4
+ .type _fp_hw,@object
+
+/* Define a symbol for the first piece of initialized data. */
+ .data
+ .globl __data_start
+__data_start:
+ .long 0
+ .weak data_start
+ data_start = __data_start
diff --git a/REORG.TODO/sysdeps/i386/stpcpy.S b/REORG.TODO/sysdeps/i386/stpcpy.S
new file mode 100644
index 0000000000..d9981b677b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/stpcpy.S
@@ -0,0 +1,88 @@
+/* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper (drepper@gnu.ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This function is defined neither in ANSI nor POSIX standards but is
+ also not invented here. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+
+ .text
+ENTRY (__stpcpy)
+
+ movl DEST(%esp), %eax
+ movl SRC(%esp), %ecx
+ subl %eax, %ecx /* magic: reduce number of loop variants
+ to one using addressing mode */
+
+ /* Here we would like to write
+
+ subl $4, %eax
+ ALIGN (4)
+
+ but the assembler is too smart and optimizes for the shortest
+ form where the number only needs one byte. But if we could
+ have the long form we would not need the alignment. */
+
+ .byte 0x81, 0xe8 /* This is `subl $0x00000004, %eax' */
+ .long 0x00000004
+
+ /* Four times unfolded loop with only one loop counter. This
+ is achieved by the use of index+base addressing mode. As the
+ loop counter we use the destination address because this is
+ also the result. */
+L(1): addl $4, %eax /* increment loop counter */
+
+ movb (%eax,%ecx), %dl /* load current char */
+ movb %dl, (%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(2) /* yes, then exit */
+
+ movb 1(%eax,%ecx), %dl /* load current char */
+ movb %dl, 1(%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(3) /* yes, then exit */
+
+ movb 2(%eax,%ecx), %dl /* load current char */
+ movb %dl, 2(%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(4) /* yes, then exit */
+
+ movb 3(%eax,%ecx), %dl /* load current char */
+ movb %dl, 3(%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jnz L(1) /* no, then continue loop */
+
+ incl %eax /* correct loop counter */
+L(4): incl %eax
+L(3): incl %eax
+L(2):
+
+ ret
+END (__stpcpy)
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
diff --git a/REORG.TODO/sysdeps/i386/stpncpy.S b/REORG.TODO/sysdeps/i386/stpncpy.S
new file mode 100644
index 0000000000..46f2aba713
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/stpncpy.S
@@ -0,0 +1,147 @@
+/* copy no more than N bytes from SRC to DEST, returning the address of
+ the terminating '\0' in DEST.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Some bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+ - original wrote n+1 chars in some cases.
+ - stpncpy() ought to behave like strncpy() ie. not null-terminate
+ if limited by n. glibc-1.09 stpncpy() does this.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+#define LEN SRC+4
+
+ .text
+ENTRY (__stpncpy)
+
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ movl DEST(%esp), %eax
+ movl SRC(%esp), %esi
+ cfi_rel_offset (esi, 0)
+ movl LEN(%esp), %ecx
+
+ subl %eax, %esi /* magic: reduce number of loop variants
+ to one using addressing mode */
+ jmp L(1) /* jump to loop "head" */
+
+ ALIGN(4)
+
+ /* Four times unfolded loop with two loop counters. We get the
+ third value (the source address) by using the index+base
+ addressing mode. */
+L(2): movb (%eax,%esi), %dl /* load current char */
+ movb %dl, (%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(7) /* yes, then exit */
+
+ movb 1(%eax,%esi), %dl /* load current char */
+ movb %dl, 1(%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(6) /* yes, then exit */
+
+ movb 2(%eax,%esi), %dl /* load current char */
+ movb %dl, 2(%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(5) /* yes, then exit */
+
+ movb 3(%eax,%esi), %dl /* load current char */
+ movb %dl, 3(%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(4) /* yes, then exit */
+
+ addl $4, %eax /* increment loop counter for full round */
+
+L(1): subl $4, %ecx /* still more than 4 bytes allowed? */
+ jae L(2) /* yes, then go to start of loop */
+
+ /* The maximal remaining 15 bytes are not processed in a loop. */
+
+ addl $4, %ecx /* correct above subtraction */
+ jz L(9) /* maximal allowed char reached => go to end */
+
+ movb (%eax,%esi), %dl /* load current char */
+ movb %dl, (%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(3) /* yes, then exit */
+
+ incl %eax /* increment pointer */
+ decl %ecx /* decrement length counter */
+ jz L(9) /* no more allowed => exit */
+
+ movb (%eax,%esi), %dl /* load current char */
+ movb %dl, (%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(3) /* yes, then exit */
+
+ incl %eax /* increment pointer */
+ decl %ecx /* decrement length counter */
+ jz L(9) /* no more allowed => exit */
+
+ movb (%eax,%esi), %dl /* load current char */
+ movb %dl, (%eax) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(3) /* yes, then exit */
+
+ incl %eax /* increment pointer */
+ jmp L(9) /* we don't have to test for counter underflow
+ because we know we had a most 3 bytes
+ remaining => exit */
+
+ /* When coming from the main loop we have to adjust the pointer. */
+L(4): decl %ecx /* decrement counter */
+ incl %eax /* increment pointer */
+
+L(5): decl %ecx /* increment pointer */
+ incl %eax /* increment pointer */
+
+L(6): decl %ecx /* increment pointer */
+ incl %eax /* increment pointer */
+L(7):
+
+ addl $3, %ecx /* correct pre-decrementation of counter
+ at the beginning of the loop; but why 3
+ and not 4? Very simple, we have to count
+ the NUL char we already wrote. */
+ jz L(9) /* counter is also 0 => exit */
+
+ /* We now have to fill the rest of the buffer with NUL. This
+ is done in a tricky way. Please note that the addressing mode
+ used below is not the same we used above. Here we use the
+ %ecx register. */
+L(8):
+ movb $0, (%ecx,%eax) /* store NUL char */
+L(3): decl %ecx /* all bytes written? */
+ jnz L(8) /* no, then again */
+
+L(9): popl %esi /* restore saved register content */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+
+ ret
+END (__stpncpy)
+
+libc_hidden_def (__stpncpy)
+weak_alias (__stpncpy, stpncpy)
diff --git a/REORG.TODO/sysdeps/i386/strcat.S b/REORG.TODO/sysdeps/i386/strcat.S
new file mode 100644
index 0000000000..4a26b3c528
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strcat.S
@@ -0,0 +1,265 @@
+/* strcat(dest, src) -- Append SRC on the end of DEST.
+ For Intel 80x86, x>=4.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>.
+ Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define RTN PARMS
+#define DEST RTN
+#define SRC DEST+4
+
+ .text
+ENTRY (strcat)
+
+ pushl %edi /* Save callee-safe register. */
+ cfi_adjust_cfa_offset (4)
+
+ movl DEST(%esp), %edx
+ movl SRC(%esp), %ecx
+
+ testb $0xff, (%ecx) /* Is source string empty? */
+ jz L(8) /* yes => return */
+
+ /* Test the first bytes separately until destination is aligned. */
+ testl $3, %edx /* destination pointer aligned? */
+ jz L(1) /* yes => begin scan loop */
+ testb $0xff, (%edx) /* is end of string? */
+ jz L(2) /* yes => start appending */
+ incl %edx /* increment source pointer */
+
+ testl $3, %edx /* destination pointer aligned? */
+ jz L(1) /* yes => begin scan loop */
+ testb $0xff, (%edx) /* is end of string? */
+ jz L(2) /* yes => start appending */
+ incl %edx /* increment source pointer */
+
+ testl $3, %edx /* destination pointer aligned? */
+ jz L(1) /* yes => begin scan loop */
+ testb $0xff, (%edx) /* is end of string? */
+ jz L(2) /* yes => start appending */
+ incl %edx /* increment source pointer */
+
+ /* Now we are aligned. Begin scan loop. */
+ jmp L(1)
+
+ cfi_rel_offset (edi, 0)
+ ALIGN(4)
+
+L(4): addl $16,%edx /* increment destination pointer for round */
+
+L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+
+ /* If you compare this with the algorithm in memchr.S you will
+ notice that here is an `xorl' statement missing. But you must
+ not forget that we are looking for C == 0 and `xorl $0, %eax'
+ is a no-op. */
+
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+
+ /* According to the algorithm we had to reverse the effect of the
+ XOR first and then test the overflow bits. But because the
+ following XOR would destroy the carry flag and it would (in a
+ representation with more than 32 bits) not alter then last
+ overflow, we can now test this condition. If no carry is signaled
+ no overflow must have occurred in the last byte => it was 0. */
+ jnc L(3)
+
+ /* We are only interested in carry bits that change due to the
+ previous add, so remove original bits */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+
+ /* Now test for the other three overflow bits. */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+
+ /* If at least one byte of the word is C we don't get 0 in %ecx. */
+ jnz L(3)
+
+ movl 4(%edx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(5) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(5) /* one byte is NUL => stop copying */
+
+ movl 8(%edx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(6) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(6) /* one byte is NUL => stop copying */
+
+ movl 12(%edx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(7) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz L(4) /* no byte is NUL => carry on copying */
+
+L(7): addl $4, %edx /* adjust source pointer */
+L(6): addl $4, %edx
+L(5): addl $4, %edx
+
+L(3): testb %al, %al /* is first byte NUL? */
+ jz L(2) /* yes => start copying */
+ incl %edx /* increment source pointer */
+
+ testb %ah, %ah /* is second byte NUL? */
+ jz L(2) /* yes => start copying */
+ incl %edx /* increment source pointer */
+
+ testl $0xff0000, %eax /* is third byte NUL? */
+ jz L(2) /* yes => start copying */
+ incl %edx /* increment source pointer */
+
+L(2): subl %ecx, %edx /* reduce number of loop variants */
+
+ /* Now we have to align the source pointer. */
+ testl $3, %ecx /* pointer correctly aligned? */
+ jz L(29) /* yes => start copy loop */
+ movb (%ecx), %al /* get first byte */
+ movb %al, (%ecx,%edx) /* and store it */
+ andb %al, %al /* is byte NUL? */
+ jz L(8) /* yes => return */
+ incl %ecx /* increment pointer */
+
+ testl $3, %ecx /* pointer correctly aligned? */
+ jz L(29) /* yes => start copy loop */
+ movb (%ecx), %al /* get first byte */
+ movb %al, (%ecx,%edx) /* and store it */
+ andb %al, %al /* is byte NUL? */
+ jz L(8) /* yes => return */
+ incl %ecx /* increment pointer */
+
+ testl $3, %ecx /* pointer correctly aligned? */
+ jz L(29) /* yes => start copy loop */
+ movb (%ecx), %al /* get first byte */
+ movb %al, (%ecx,%edx) /* and store it */
+ andb %al, %al /* is byte NUL? */
+ jz L(8) /* yes => return */
+ incl %ecx /* increment pointer */
+
+ /* Now we are aligned. */
+ jmp L(29) /* start copy loop */
+
+ ALIGN(4)
+
+L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
+ addl $16, %ecx /* adjust pointer for full round */
+
+L(29): movl (%ecx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(9) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(9) /* one byte is NUL => stop copying */
+ movl %eax, (%ecx,%edx) /* store word to destination */
+
+ movl 4(%ecx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(91) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(91) /* one byte is NUL => stop copying */
+ movl %eax, 4(%ecx,%edx) /* store word to destination */
+
+ movl 8(%ecx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(92) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(92) /* one byte is NUL => stop copying */
+ movl %eax, 8(%ecx,%edx) /* store word to destination */
+
+ movl 12(%ecx), %eax /* get word from source */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %eax, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(93) /* highest byte is C => stop copying */
+ xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz L(28) /* no is NUL => carry on copying */
+
+L(93): addl $4, %ecx /* adjust pointer */
+L(92): addl $4, %ecx
+L(91): addl $4, %ecx
+
+L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
+ orb %al, %al /* is it NUL? */
+ jz L(8) /* yes => return */
+
+ movb %ah, 1(%ecx,%edx) /* store second byte of last word */
+ orb %ah, %ah /* is it NUL? */
+ jz L(8) /* yes => return */
+
+ shrl $16, %eax /* make upper bytes accessible */
+ movb %al, 2(%ecx,%edx) /* store third byte of last word */
+ orb %al, %al /* is it NUL? */
+ jz L(8) /* yes => return */
+
+ movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */
+
+L(8): movl DEST(%esp), %eax /* start address of destination is result */
+ popl %edi /* restore saved register */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (strcat)
+libc_hidden_builtin_def (strcat)
diff --git a/REORG.TODO/sysdeps/i386/strchr.S b/REORG.TODO/sysdeps/i386/strchr.S
new file mode 100644
index 0000000000..6075e77882
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strchr.S
@@ -0,0 +1,290 @@
+/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define RTN PARMS
+#define STR RTN
+#define CHR STR+4
+
+ .text
+ENTRY (strchr)
+
+ pushl %edi /* Save callee-safe registers used here. */
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+ movl STR(%esp), %eax
+ movl CHR(%esp), %edx
+
+ /* At the moment %edx contains C. What we need for the
+ algorithm is C in all bytes of the dword. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ movb %dl, %dh /* now it is 0|0|c|c */
+ movl %edx, %ecx
+ shll $16, %edx /* now it is c|c|0|0 */
+ movw %cx, %dx /* and finally c|c|c|c */
+
+ /* Before we start with the main loop we process single bytes
+ until the source pointer is aligned. This has two reasons:
+ 1. aligned 32-bit memory access is faster
+ and (more important)
+ 2. we process in the main loop 32 bit in one step although
+ we don't know the end of the string. But accessing at
+ 4-byte alignment guarantees that we never access illegal
+ memory if this would not also be done by the trivial
+ implementation (this is because all processor inherent
+ boundaries are multiples of 4. */
+
+ testb $3, %al /* correctly aligned ? */
+ jz L(11) /* yes => begin loop */
+ movb (%eax), %cl /* load byte in question (we need it twice) */
+ cmpb %cl, %dl /* compare byte */
+ je L(6) /* target found => return */
+ testb %cl, %cl /* is NUL? */
+ jz L(2) /* yes => return NULL */
+ incl %eax /* increment pointer */
+
+ testb $3, %al /* correctly aligned ? */
+ jz L(11) /* yes => begin loop */
+ movb (%eax), %cl /* load byte in question (we need it twice) */
+ cmpb %cl, %dl /* compare byte */
+ je L(6) /* target found => return */
+ testb %cl, %cl /* is NUL? */
+ jz L(2) /* yes => return NULL */
+ incl %eax /* increment pointer */
+
+ testb $3, %al /* correctly aligned ? */
+ jz L(11) /* yes => begin loop */
+ movb (%eax), %cl /* load byte in question (we need it twice) */
+ cmpb %cl, %dl /* compare byte */
+ je L(6) /* target found => return */
+ testb %cl, %cl /* is NUL? */
+ jz L(2) /* yes => return NULL */
+ incl %eax /* increment pointer */
+
+ /* No we have reached alignment. */
+ jmp L(11) /* begin loop */
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-31 is set, there will be a carry
+ into bit 32 (=carry flag), so all of the hole bits will
+ be changed.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ /* Each round the main loop processes 16 bytes. */
+
+ ALIGN(4)
+
+L(1): addl $16, %eax /* adjust pointer for whole round */
+
+L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* C */
+
+ /* According to the algorithm we had to reverse the effect of the
+ XOR first and then test the overflow bits. But because the
+ following XOR would destroy the carry flag and it would (in a
+ representation with more than 32 bits) not alter then last
+ overflow, we can now test this condition. If no carry is signaled
+ no overflow must have occurred in the last byte => it was 0. */
+ jnc L(7)
+
+ /* We are only interested in carry bits that change due to the
+ previous add, so remove original bits */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+
+ /* Now test for the other three overflow bits. */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+
+ /* If at least one byte of the word is C we don't get 0 in %edi. */
+ jnz L(7) /* found it => return pointer */
+
+ /* Now we made sure the dword does not contain the character we are
+ looking for. But because we deal with strings we have to check
+ for the end of string before testing the next dword. */
+
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(2) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(2) /* found NUL => return NULL */
+
+ movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* C */
+ jnc L(71) /* highest byte is C => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(71) /* found it => return pointer */
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(2) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(2) /* found NUL => return NULL */
+
+ movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* C */
+ jnc L(72) /* highest byte is C => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(72) /* found it => return pointer */
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(2) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(2) /* found NUL => return NULL */
+
+ movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* C */
+ jnc L(73) /* highest byte is C => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(73) /* found it => return pointer */
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(2) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz L(1) /* no NUL found => restart loop */
+
+L(2): /* Return NULL. */
+ xorl %eax, %eax
+ popl %edi /* restore saved register content */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+L(73): addl $4, %eax /* adjust pointer */
+L(72): addl $4, %eax
+L(71): addl $4, %eax
+
+ /* We now scan for the byte in which the character was matched.
+ But we have to take care of the case that a NUL char is
+ found before this in the dword. Note that we XORed %ecx
+ with the byte we're looking for, therefore the tests below look
+ reversed. */
+
+L(7): testb %cl, %cl /* is first byte C? */
+ jz L(6) /* yes => return pointer */
+ cmpb %dl, %cl /* is first byte NUL? */
+ je L(2) /* yes => return NULL */
+ incl %eax /* it's not in the first byte */
+
+ testb %ch, %ch /* is second byte C? */
+ jz L(6) /* yes => return pointer */
+ cmpb %dl, %ch /* is second byte NUL? */
+ je L(2) /* yes => return NULL? */
+ incl %eax /* it's not in the second byte */
+
+ shrl $16, %ecx /* make upper byte accessible */
+ testb %cl, %cl /* is third byte C? */
+ jz L(6) /* yes => return pointer */
+ cmpb %dl, %cl /* is third byte NUL? */
+ je L(2) /* yes => return NULL */
+
+ /* It must be in the fourth byte and it cannot be NUL. */
+ incl %eax
+
+L(6):
+ popl %edi /* restore saved register content */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (strchr)
+
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)
diff --git a/REORG.TODO/sysdeps/i386/strchrnul.S b/REORG.TODO/sysdeps/i386/strchrnul.S
new file mode 100644
index 0000000000..800b872c74
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strchrnul.S
@@ -0,0 +1,278 @@
+/* strchrnul (str, chr) -- Return pointer to first occurrence of CHR in STR
+ or the final NUL byte.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>
+ Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+4 /* space for 1 saved reg */
+#define RTN PARMS
+#define STR RTN
+#define CHR STR+4
+
+ .text
+ENTRY (__strchrnul)
+
+ pushl %edi /* Save callee-safe registers used here. */
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+
+ movl STR(%esp), %eax
+ movl CHR(%esp), %edx
+
+ /* At the moment %edx contains CHR. What we need for the
+ algorithm is CHR in all bytes of the dword. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ movb %dl, %dh /* now it is 0|0|c|c */
+ movl %edx, %ecx
+ shll $16, %edx /* now it is c|c|0|0 */
+ movw %cx, %dx /* and finally c|c|c|c */
+
+ /* Before we start with the main loop we process single bytes
+ until the source pointer is aligned. This has two reasons:
+ 1. aligned 32-bit memory access is faster
+ and (more important)
+ 2. we process in the main loop 32 bit in one step although
+ we don't know the end of the string. But accessing at
+ 4-byte alignment guarantees that we never access illegal
+ memory if this would not also be done by the trivial
+ implementation (this is because all processor inherent
+ boundaries are multiples of 4. */
+
+ testb $3, %al /* correctly aligned ? */
+ jz L(11) /* yes => begin loop */
+ movb (%eax), %cl /* load byte in question (we need it twice) */
+ cmpb %cl, %dl /* compare byte */
+ je L(6) /* target found => return */
+ testb %cl, %cl /* is NUL? */
+ jz L(6) /* yes => return NULL */
+ incl %eax /* increment pointer */
+
+ testb $3, %al /* correctly aligned ? */
+ jz L(11) /* yes => begin loop */
+ movb (%eax), %cl /* load byte in question (we need it twice) */
+ cmpb %cl, %dl /* compare byte */
+ je L(6) /* target found => return */
+ testb %cl, %cl /* is NUL? */
+ jz L(6) /* yes => return NULL */
+ incl %eax /* increment pointer */
+
+ testb $3, %al /* correctly aligned ? */
+ jz L(11) /* yes => begin loop */
+ movb (%eax), %cl /* load byte in question (we need it twice) */
+ cmpb %cl, %dl /* compare byte */
+ je L(6) /* target found => return */
+ testb %cl, %cl /* is NUL? */
+ jz L(6) /* yes => return NULL */
+ incl %eax /* increment pointer */
+
+ /* No we have reached alignment. */
+ jmp L(11) /* begin loop */
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-31 is set, there will be a carry
+ into bit 32 (=carry flag), so all of the hole bits will
+ be changed.
+
+ 3) But wait! Aren't we looking for CHR, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is CHR. This turns each byte that is CHR
+ into a zero. */
+
+ /* Each round the main loop processes 16 bytes. */
+
+ ALIGN(4)
+
+L(1): addl $16, %eax /* adjust pointer for whole round */
+
+L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* CHR */
+
+ /* According to the algorithm we had to reverse the effect of the
+ XOR first and then test the overflow bits. But because the
+ following XOR would destroy the carry flag and it would (in a
+ representation with more than 32 bits) not alter then last
+ overflow, we can now test this condition. If no carry is signaled
+ no overflow must have occurred in the last byte => it was 0. */
+ jnc L(7)
+
+ /* We are only interested in carry bits that change due to the
+ previous add, so remove original bits */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+
+ /* Now test for the other three overflow bits. */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+
+ /* If at least one byte of the word is CHR we don't get 0 in %edi. */
+ jnz L(7) /* found it => return pointer */
+
+ /* Now we made sure the dword does not contain the character we are
+ looking for. But because we deal with strings we have to check
+ for the end of string before testing the next dword. */
+
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(7) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(7) /* found NUL => return NULL */
+
+ movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* CHR */
+ jnc L(71) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(71) /* found it => return pointer */
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(71) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(71) /* found NUL => return NULL */
+
+ movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* CHR */
+ jnc L(72) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(72) /* found it => return pointer */
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(72) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(72) /* found NUL => return NULL */
+
+ movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
+ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* CHR */
+ jnc L(73) /* highest byte is CHR => return pointer */
+ xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(73) /* found it => return pointer */
+ xorl %edx, %ecx /* restore original dword without reload */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %ecx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(73) /* highest byte is NUL => return NULL */
+ xorl %ecx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz L(1) /* no NUL found => restart loop */
+
+L(73): addl $4, %eax /* adjust pointer */
+L(72): addl $4, %eax
+L(71): addl $4, %eax
+
+ /* We now scan for the byte in which the character was matched.
+ But we have to take care of the case that a NUL char is
+ found before this in the dword. */
+
+L(7): testb %cl, %cl /* is first byte CHR? */
+ jz L(6) /* yes => return pointer */
+ cmpb %dl, %cl /* is first byte NUL? */
+ je L(6) /* yes => return NULL */
+ incl %eax /* it's not in the first byte */
+
+ testb %ch, %ch /* is second byte CHR? */
+ jz L(6) /* yes => return pointer */
+ cmpb %dl, %ch /* is second byte NUL? */
+ je L(6) /* yes => return NULL? */
+ incl %eax /* it's not in the second byte */
+
+ shrl $16, %ecx /* make upper byte accessible */
+ testb %cl, %cl /* is third byte CHR? */
+ jz L(6) /* yes => return pointer */
+ cmpb %dl, %cl /* is third byte NUL? */
+ je L(6) /* yes => return NULL */
+
+ /* It must be in the fourth byte and it cannot be NUL. */
+ incl %eax
+
+L(6): popl %edi /* restore saved register content */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__strchrnul)
+
+weak_alias (__strchrnul, strchrnul)
diff --git a/REORG.TODO/sysdeps/i386/strcspn.S b/REORG.TODO/sysdeps/i386/strcspn.S
new file mode 100644
index 0000000000..c852a3b1e5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strcspn.S
@@ -0,0 +1,240 @@
+/* strcspn (str, ss) -- Return the length of the initial segment of STR
+ which contains no characters from SS.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define STR PARMS
+#define STOP STR+4
+
+ .text
+ENTRY (strcspn)
+
+ movl STR(%esp), %edx
+ movl STOP(%esp), %eax
+
+ /* First we create a table with flags for all possible characters.
+ For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
+ supported by the C string functions we have 256 characters.
+ Before inserting marks for the stop characters we clear the whole
+ table. The unrolled form is much faster than a loop. */
+ xorl %ecx, %ecx /* %ecx = 0 !!! */
+
+ pushl %ecx /* make a 256 bytes long block filled with 0 */
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* These immediate values make the label 2 */
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* to be aligned on a 16 byte boundary to */
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* get a better performance of the loop. */
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+
+/* For understanding the following code remember that %ecx == 0 now.
+ Although all the following instruction only modify %cl we always
+ have a correct zero-extended 32-bit value in %ecx. */
+
+/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want
+ longer instructions so that the next loop aligns without adding nops. */
+
+L(2): movb (%eax), %cl /* get byte from stopset */
+ testb %cl, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 1(%eax), %cl /* get byte from stopset */
+ testb $0xff, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 2(%eax), %cl /* get byte from stopset */
+ testb $0xff, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 3(%eax), %cl /* get byte from stopset */
+ addl $4, %eax /* increment stopset pointer */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+ testb $0xff, %cl /* is NUL char? */
+ jnz L(2) /* no => process next dword from stopset */
+
+L(1): leal -4(%edx), %eax /* prepare loop */
+
+ /* We use a neat trick for the following loop. Normally we would
+ have to test for two termination conditions
+ 1. a character in the stopset was found
+ and
+ 2. the end of the string was found
+ But as a sign that the character is in the stopset we store its
+ value in the table. But the value of NUL is NUL so the loop
+ terminates for NUL in every case. */
+
+L(3): addl $4, %eax /* adjust pointer for full loop round */
+
+ movb (%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ je L(4) /* yes => return */
+
+ movb 1(%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ je L(5) /* yes => return */
+
+ movb 2(%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ je L(6) /* yes => return */
+
+ movb 3(%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ jne L(3) /* yes => return */
+
+ incl %eax /* adjust pointer */
+L(6): incl %eax
+L(5): incl %eax
+
+L(4): addl $256, %esp /* remove stopset */
+ cfi_adjust_cfa_offset (-256)
+ subl %edx, %eax /* we have to return the number of valid
+ characters, so compute distance to first
+ non-valid character */
+ ret
+END (strcspn)
+libc_hidden_builtin_def (strcspn)
diff --git a/REORG.TODO/sysdeps/i386/string-inlines.c b/REORG.TODO/sysdeps/i386/string-inlines.c
new file mode 100644
index 0000000000..d023bc3aa3
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/string-inlines.c
@@ -0,0 +1,47 @@
+/* Copyright (C) 1999-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is to avoid PLT entries for the x86 version. */
+#define __memcpy_g __memcpy_g_internal
+#define __strchr_g __strchr_g_internal
+#include <string/string-inlines.c>
+
+void *
+(__memcpy_c) (void *d, const void *s, size_t n)
+{
+ return memcpy (d, s, n);
+}
+
+void *
+__memset_cc (void *s, unsigned long int pattern, size_t n)
+{
+ return memset (s, pattern & 0xff, n);
+}
+strong_alias (__memset_cc, __memset_cg)
+
+void *
+__memset_gg (void *s, char c, size_t n)
+{
+ return memset (s, c, n);
+}
+
+#ifdef __memcpy_c
+# undef __memcpy_g
+strong_alias (__memcpy_g_internal, __memcpy_g)
+# undef __strchr_g
+strong_alias (__strchr_g_internal, __strchr_g)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/strlen.S b/REORG.TODO/sysdeps/i386/strlen.S
new file mode 100644
index 0000000000..192fadf20a
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strlen.S
@@ -0,0 +1,132 @@
+/* strlen(str) -- determine the length of the string STR.
+ Optimized for Intel 80x86, x>=4.
+ Copyright (C) 1991-2017 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define STR PARMS
+
+ .text
+ENTRY (strlen)
+
+ movl STR(%esp), %ecx
+ movl %ecx, %eax /* duplicate it */
+
+ andl $3, %ecx /* mask alignment bits */
+ jz L(1) /* aligned => start loop */
+ cmpb %ch, (%eax) /* is byte NUL? */
+ je L(2) /* yes => return */
+ incl %eax /* increment pointer */
+
+ xorl $3, %ecx /* was alignment = 3? */
+ jz L(1) /* yes => now it is aligned and start loop */
+ cmpb %ch, (%eax) /* is byte NUL? */
+ je L(2) /* yes => return */
+ addl $1, %eax /* increment pointer */
+
+ subl $1, %ecx /* was alignment = 2? */
+ jz L(1) /* yes => now it is aligned and start loop */
+ cmpb %ch, (%eax) /* is byte NUL? */
+ je L(2) /* yes => return */
+
+/* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax'
+ and `decl %ecx' resp. The additional two byte per instruction make the
+ label 4 to be aligned on a 16 byte boundary with nops.
+
+ The following `sub $15, %eax' is part of this trick, too. Together with
+ the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just
+ as expected from the algorithm. But doing so has the advantage that
+ no jump to label 1 is necessary and so the pipeline is not flushed. */
+
+ subl $15, %eax /* effectively +1 */
+
+
+L(4): addl $16, %eax /* adjust pointer for full loop */
+
+L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edx /* magic value */
+ addl %ecx, %edx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(3) /* highest byte is NUL => return pointer */
+ xorl %ecx, %edx /* (word+magic)^word */
+ orl $0xfefefeff, %edx /* set all non-carry bits */
+ incl %edx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(3) /* found NUL => return pointer */
+
+ movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edx /* magic value */
+ addl %ecx, %edx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(5) /* highest byte is NUL => return pointer */
+ xorl %ecx, %edx /* (word+magic)^word */
+ orl $0xfefefeff, %edx /* set all non-carry bits */
+ incl %edx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(5) /* found NUL => return pointer */
+
+ movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edx /* magic value */
+ addl %ecx, %edx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(6) /* highest byte is NUL => return pointer */
+ xorl %ecx, %edx /* (word+magic)^word */
+ orl $0xfefefeff, %edx /* set all non-carry bits */
+ incl %edx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(6) /* found NUL => return pointer */
+
+ movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edx /* magic value */
+ addl %ecx, %edx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(7) /* highest byte is NUL => return pointer */
+ xorl %ecx, %edx /* (word+magic)^word */
+ orl $0xfefefeff, %edx /* set all non-carry bits */
+ incl %edx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz L(4) /* no NUL found => continue loop */
+
+L(7): addl $4, %eax /* adjust pointer */
+L(6): addl $4, %eax
+L(5): addl $4, %eax
+
+L(3): testb %cl, %cl /* is first byte NUL? */
+ jz L(2) /* yes => return */
+ incl %eax /* increment pointer */
+
+ testb %ch, %ch /* is second byte NUL? */
+ jz L(2) /* yes => return */
+ incl %eax /* increment pointer */
+
+ testl $0xff0000, %ecx /* is third byte NUL? */
+ jz L(2) /* yes => return pointer */
+ incl %eax /* increment pointer */
+
+L(2): subl STR(%esp), %eax /* compute difference to string start */
+
+ ret
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/REORG.TODO/sysdeps/i386/strlen.c b/REORG.TODO/sysdeps/i386/strlen.c
new file mode 100644
index 0000000000..0b69957392
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strlen.c
@@ -0,0 +1,35 @@
+/* Determine the length of a string. For Intel 80x86, x>=3.
+ Copyright (C) 1991-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+size_t
+strlen (const char *str)
+{
+ int cnt;
+
+ asm("cld\n" /* Search forward. */
+ /* Some old versions of gas need `repne' instead of `repnz'. */
+ "repnz\n" /* Look for a zero byte. */
+ "scasb" /* %0, %1, %3 */ :
+ "=c" (cnt) : "D" (str), "0" (-1), "a" (0));
+
+ return -2 - cnt;
+}
+libc_hidden_builtin_def (strlen)
diff --git a/REORG.TODO/sysdeps/i386/strpbrk.S b/REORG.TODO/sysdeps/i386/strpbrk.S
new file mode 100644
index 0000000000..1109b233da
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strpbrk.S
@@ -0,0 +1,243 @@
+/* strcspn (str, ss) -- Return the length of the initial segement of STR
+ which contains no characters from SS.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define RTN PARMS
+#define STR RTN
+#define STOP STR+4
+
+ .text
+ENTRY (strpbrk)
+
+ movl STR(%esp), %edx
+ movl STOP(%esp), %eax
+
+ /* First we create a table with flags for all possible characters.
+ For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
+ supported by the C string functions we have 256 characters.
+ Before inserting marks for the stop characters we clear the whole
+ table. The unrolled form is much faster than a loop. */
+ xorl %ecx, %ecx /* %ecx = 0 !!! */
+
+ pushl %ecx /* make a 256 bytes long block filled with 0 */
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* These immediate values make the label 2 */
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* to be aligned on a 16 byte boundary to */
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* get a better performance of the loop. */
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+
+/* For understanding the following code remember that %ecx == 0 now.
+ Although all the following instruction only modify %cl we always
+ have a correct zero-extended 32-bit value in %ecx. */
+
+/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want
+ longer instructions so that the next loop aligns without adding nops. */
+
+L(2): movb (%eax), %cl /* get byte from stopset */
+ testb %cl, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 1(%eax), %cl /* get byte from stopset */
+ testb $0xff, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 2(%eax), %cl /* get byte from stopset */
+ testb $0xff, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 3(%eax), %cl /* get byte from stopset */
+ addl $4, %eax /* increment stopset pointer */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+ testb $0xff, %cl /* is NUL char? */
+ jnz L(2) /* no => process next dword from stopset */
+
+L(1): leal -4(%edx), %eax /* prepare loop */
+
+ /* We use a neat trick for the following loop. Normally we would
+ have to test for two termination conditions
+ 1. a character in the stopset was found
+ and
+ 2. the end of the string was found
+ But as a sign that the character is in the stopset we store its
+ value in the table. But the value of NUL is NUL so the loop
+ terminates for NUL in every case. */
+
+L(3): addl $4, %eax /* adjust pointer for full loop round */
+
+ movb (%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ je L(4) /* yes => return */
+
+ movb 1(%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ je L(5) /* yes => return */
+
+ movb 2(%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ je L(6) /* yes => return */
+
+ movb 3(%eax), %cl /* get byte from string */
+ cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */
+ jne L(3) /* yes => return */
+
+ incl %eax /* adjust pointer */
+L(6): incl %eax
+L(5): incl %eax
+
+L(4): addl $256, %esp /* remove stopset */
+ cfi_adjust_cfa_offset (-256)
+
+ orb %cl, %cl /* was last character NUL? */
+ jnz L(7) /* no => return pointer */
+ xorl %eax, %eax
+
+L(7): ret
+END (strpbrk)
+libc_hidden_builtin_def (strpbrk)
diff --git a/REORG.TODO/sysdeps/i386/strrchr.S b/REORG.TODO/sysdeps/i386/strrchr.S
new file mode 100644
index 0000000000..95b304dc0b
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strrchr.S
@@ -0,0 +1,334 @@
+/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define RTN PARMS
+#define STR RTN
+#define CHR STR+4
+
+ .text
+ENTRY (strrchr)
+
+ pushl %edi /* Save callee-safe registers used here. */
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 0)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ xorl %eax, %eax
+ movl STR(%esp), %esi
+ cfi_rel_offset (esi, 0)
+ movl CHR(%esp), %ecx
+
+ /* At the moment %ecx contains C. What we need for the
+ algorithm is C in all bytes of the dword. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ movb %cl, %ch /* now it is 0|0|c|c */
+ movl %ecx, %edx
+ shll $16, %ecx /* now it is c|c|0|0 */
+ movw %dx, %cx /* and finally c|c|c|c */
+
+ /* Before we start with the main loop we process single bytes
+ until the source pointer is aligned. This has two reasons:
+ 1. aligned 32-bit memory access is faster
+ and (more important)
+ 2. we process in the main loop 32 bit in one step although
+ we don't know the end of the string. But accessing at
+ 4-byte alignment guarantees that we never access illegal
+ memory if this would not also be done by the trivial
+ implementation (this is because all processor inherent
+ boundaries are multiples of 4. */
+
+ testl $3, %esi /* correctly aligned ? */
+ jz L(19) /* yes => begin loop */
+ movb (%esi), %dl /* load byte in question (we need it twice) */
+ cmpb %dl, %cl /* compare byte */
+ jne L(11) /* target found => return */
+ movl %esi, %eax /* remember pointer as possible result */
+L(11): orb %dl, %dl /* is NUL? */
+ jz L(2) /* yes => return NULL */
+ incl %esi /* increment pointer */
+
+ testl $3, %esi /* correctly aligned ? */
+ jz L(19) /* yes => begin loop */
+ movb (%esi), %dl /* load byte in question (we need it twice) */
+ cmpb %dl, %cl /* compare byte */
+ jne L(12) /* target found => return */
+ movl %esi, %eax /* remember pointer as result */
+L(12): orb %dl, %dl /* is NUL? */
+ jz L(2) /* yes => return NULL */
+ incl %esi /* increment pointer */
+
+ testl $3, %esi /* correctly aligned ? */
+ jz L(19) /* yes => begin loop */
+ movb (%esi), %dl /* load byte in question (we need it twice) */
+ cmpb %dl, %cl /* compare byte */
+ jne L(13) /* target found => return */
+ movl %esi, %eax /* remember pointer as result */
+L(13): orb %dl, %dl /* is NUL? */
+ jz L(2) /* yes => return NULL */
+ incl %esi /* increment pointer */
+
+ /* No we have reached alignment. */
+ jmp L(19) /* begin loop */
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-31 is set, there will be a carry
+ into bit 32 (=carry flag), so all of the hole bits will
+ be changed.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ /* Each round the main loop processes 16 bytes. */
+
+ /* Jump to here when the character is detected. We chose this
+ way around because the character one is looking for is not
+ as frequent as the rest and taking a conditional jump is more
+ expensive than ignoring it.
+
+ Some more words to the code below: it might not be obvious why
+ we decrement the source pointer here. In the loop the pointer
+ is not pre-incremented and so it still points before the word
+ we are looking at. But you should take a look at the instruction
+ which gets executed before we get into the loop: `addl $16, %esi'.
+ This makes the following subs into adds. */
+
+ /* These fill bytes make the main loop be correctly aligned.
+ We cannot use align because it is not the following instruction
+ which should be aligned. */
+ .byte 0, 0
+#ifndef PROF
+ /* Profiling adds some code and so changes the alignment. */
+ .byte 0
+#endif
+
+L(4): subl $4, %esi /* adjust pointer */
+L(41): subl $4, %esi
+L(42): subl $4, %esi
+L(43): testl $0xff000000, %edx /* is highest byte == C? */
+ jnz L(33) /* no => try other bytes */
+ leal 15(%esi), %eax /* store address as result */
+ jmp L(1) /* and start loop again */
+
+L(3): subl $4, %esi /* adjust pointer */
+L(31): subl $4, %esi
+L(32): subl $4, %esi
+L(33): testl $0xff0000, %edx /* is C in third byte? */
+ jnz L(51) /* no => try other bytes */
+ leal 14(%esi), %eax /* store address as result */
+ jmp L(1) /* and start loop again */
+
+L(51):
+ /* At this point we know that the byte is in one of the lower bytes.
+ We make a guess and correct it if necessary. This reduces the
+ number of necessary jumps. */
+ leal 12(%esi), %eax /* guess address of lowest byte as result */
+ testb %dh, %dh /* is guess correct? */
+ jnz L(1) /* yes => start loop */
+ leal 13(%esi), %eax /* correct guess to second byte */
+
+L(1): addl $16, %esi /* increment pointer for full round */
+
+L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+
+ /* According to the algorithm we had to reverse the effect of the
+ XOR first and then test the overflow bits. But because the
+ following XOR would destroy the carry flag and it would (in a
+ representation with more than 32 bits) not alter then last
+ overflow, we can now test this condition. If no carry is signaled
+ no overflow must have occurred in the last byte => it was 0. */
+
+ jnc L(20) /* found NUL => check last word */
+
+ /* We are only interested in carry bits that change due to the
+ previous add, so remove original bits */
+ xorl %edx, %edi /* (word+magic)^word */
+
+ /* Now test for the other three overflow bits. */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+
+ /* If at least one byte of the word is C we don't get 0 in %edi. */
+ jnz L(20) /* found NUL => check last word */
+
+ /* Now we made sure the dword does not contain the character we are
+ looking for. But because we deal with strings we have to check
+ for the end of string before testing the next dword. */
+
+ xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(4) /* highest byte is C => examine dword */
+ xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(3) /* C is detected in the word => examine it */
+
+ movl 4(%esi), %edx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(21) /* found NUL => check last word */
+ xorl %edx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(21) /* found NUL => check last word */
+ xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(41) /* highest byte is C => examine dword */
+ xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(31) /* C is detected in the word => examine it */
+
+ movl 8(%esi), %edx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(22) /* found NUL => check last word */
+ xorl %edx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(22) /* found NUL => check last word */
+ xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(42) /* highest byte is C => examine dword */
+ xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(32) /* C is detected in the word => examine it */
+
+ movl 12(%esi), %edx /* get word (= 4 bytes) in question */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(23) /* found NUL => check last word */
+ xorl %edx, %edi /* (word+magic)^word */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz L(23) /* found NUL => check last word */
+ xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c
+ are now 0 */
+ movl $0xfefefeff, %edi /* magic value */
+ addl %edx, %edi /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc L(43) /* highest byte is C => examine dword */
+ xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
+ orl $0xfefefeff, %edi /* set all non-carry bits */
+ incl %edi /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz L(1) /* C is not detected => restart loop */
+ jmp L(33) /* examine word */
+
+L(23): addl $4, %esi /* adjust pointer */
+L(22): addl $4, %esi
+L(21): addl $4, %esi
+
+ /* What remains to do is to test which byte the NUL char is and
+ whether the searched character appears in one of the bytes
+ before. A special case is that the searched byte maybe NUL.
+ In this case a pointer to the terminating NUL char has to be
+ returned. */
+
+L(20): cmpb %cl, %dl /* is first byte == C? */
+ jne L(24) /* no => skip */
+ movl %esi, %eax /* store address as result */
+L(24): testb %dl, %dl /* is first byte == NUL? */
+ jz L(2) /* yes => return */
+
+ cmpb %cl, %dh /* is second byte == C? */
+ jne L(25) /* no => skip */
+ leal 1(%esi), %eax /* store address as result */
+L(25): testb %dh, %dh /* is second byte == NUL? */
+ jz L(2) /* yes => return */
+
+ shrl $16,%edx /* make upper bytes accessible */
+ cmpb %cl, %dl /* is third byte == C */
+ jne L(26) /* no => skip */
+ leal 2(%esi), %eax /* store address as result */
+L(26): testb %dl, %dl /* is third byte == NUL */
+ jz L(2) /* yes => return */
+
+ cmpb %cl, %dh /* is fourth byte == C */
+ jne L(2) /* no => skip */
+ leal 3(%esi), %eax /* store address as result */
+
+L(2): popl %esi /* restore saved register content */
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (strrchr)
+
+weak_alias (strrchr, rindex)
+libc_hidden_builtin_def (strrchr)
diff --git a/REORG.TODO/sysdeps/i386/strspn.S b/REORG.TODO/sysdeps/i386/strspn.S
new file mode 100644
index 0000000000..d433eb6af5
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/strspn.S
@@ -0,0 +1,240 @@
+/* strcspn (str, ss) -- Return the length of the initial segment of STR
+ which contains only characters from SS.
+ For Intel 80x86, x>=3.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
+ Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS 4 /* no space for saved regs */
+#define STR PARMS
+#define SKIP STR+4
+
+ .text
+ENTRY (strspn)
+
+ movl STR(%esp), %edx
+ movl SKIP(%esp), %eax
+
+ /* First we create a table with flags for all possible characters.
+ For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
+ supported by the C string functions we have 256 characters.
+ Before inserting marks for the stop characters we clear the whole
+ table. The unrolled form is much faster than a loop. */
+ xorl %ecx, %ecx /* %ecx = 0 !!! */
+
+ pushl %ecx /* make a 256 bytes long block filled with 0 */
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* These immediate values make the label 2 */
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* to be aligned on a 16 byte boundary to */
+ cfi_adjust_cfa_offset (4)
+ pushl $0 /* get a better performance of the loop. */
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+ pushl $0
+ cfi_adjust_cfa_offset (4)
+
+/* For understanding the following code remember that %ecx == 0 now.
+ Although all the following instruction only modify %cl we always
+ have a correct zero-extended 32-bit value in %ecx. */
+
+/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want
+ longer instructions so that the next loop aligns without adding nops. */
+
+L(2): movb (%eax), %cl /* get byte from stopset */
+ testb %cl, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 1(%eax), %cl /* get byte from stopset */
+ testb $0xff, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 2(%eax), %cl /* get byte from stopset */
+ testb $0xff, %cl /* is NUL char? */
+ jz L(1) /* yes => start compare loop */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+
+ movb 3(%eax), %cl /* get byte from stopset */
+ addl $4, %eax /* increment stopset pointer */
+ movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */
+ testb $0xff, %cl /* is NUL char? */
+ jnz L(2) /* no => process next dword from stopset */
+
+L(1): leal -4(%edx), %eax /* prepare loop */
+
+ /* We use a neat trick for the following loop. Normally we would
+ have to test for two termination conditions
+ 1. a character in the stopset was found
+ and
+ 2. the end of the string was found
+ But as a sign that the character is in the stopset we store its
+ value in the table. But the value of NUL is NUL so the loop
+ terminates for NUL in every case. */
+
+L(3): addl $4, %eax /* adjust pointer for full loop round */
+
+ movb (%eax), %cl /* get byte from string */
+ testb %cl, (%esp,%ecx) /* is it contained in skipset? */
+ jz L(4) /* no => return */
+
+ movb 1(%eax), %cl /* get byte from string */
+ testb %cl, (%esp,%ecx) /* is it contained in skipset? */
+ jz L(5) /* no => return */
+
+ movb 2(%eax), %cl /* get byte from string */
+ testb %cl, (%esp,%ecx) /* is it contained in skipset? */
+ jz L(6) /* no => return */
+
+ movb 3(%eax), %cl /* get byte from string */
+ testb %cl, (%esp,%ecx) /* is it contained in skipset? */
+ jnz L(3) /* yes => start loop again */
+
+ incl %eax /* adjust pointer */
+L(6): incl %eax
+L(5): incl %eax
+
+L(4): addl $256, %esp /* remove stopset */
+ cfi_adjust_cfa_offset (-256)
+ subl %edx, %eax /* we have to return the number of valid
+ characters, so compute distance to first
+ non-valid character */
+ ret
+END (strspn)
+libc_hidden_builtin_def (strspn)
diff --git a/REORG.TODO/sysdeps/i386/sub_n.S b/REORG.TODO/sysdeps/i386/sub_n.S
new file mode 100644
index 0000000000..3649da29e7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/sub_n.S
@@ -0,0 +1,111 @@
+/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define S2 S1+4
+#define SIZE S2+4
+
+ .text
+ENTRY (__mpn_sub_n)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ movl RES(%esp),%edi
+ cfi_rel_offset (edi, 4)
+ movl S1(%esp),%esi
+ cfi_rel_offset (esi, 0)
+ movl S2(%esp),%edx
+ movl SIZE(%esp),%ecx
+ movl %ecx,%eax
+ shrl $3,%ecx /* compute count for unrolled loop */
+ negl %eax
+ andl $7,%eax /* get index where to start loop */
+ jz L(oop) /* necessary special case for 0 */
+ incl %ecx /* adjust loop count */
+ shll $2,%eax /* adjustment for pointers... */
+ subl %eax,%edi /* ... since they are offset ... */
+ subl %eax,%esi /* ... by a constant when we ... */
+ subl %eax,%edx /* ... enter the loop */
+ shrl $2,%eax /* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC. Due to limitations in some
+ assemblers, Loop-L0-3 cannot be put into the leal */
+ call L(0)
+ cfi_adjust_cfa_offset (4)
+L(0): leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $(L(oop)-L(0)-3),%eax
+ addl $4,%esp
+ cfi_adjust_cfa_offset (-4)
+#else
+/* Calculate start address in loop for non-PIC. */
+ leal (L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+ jmp *%eax /* jump into loop */
+ ALIGN (3)
+L(oop): movl (%esi),%eax
+ sbbl (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ sbbl 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ sbbl 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ sbbl 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ sbbl 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ sbbl 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ sbbl 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ sbbl 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz L(oop)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_sub_n)
diff --git a/REORG.TODO/sysdeps/i386/submul_1.S b/REORG.TODO/sysdeps/i386/submul_1.S
new file mode 100644
index 0000000000..c765e8dd79
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/submul_1.S
@@ -0,0 +1,86 @@
+/* i80386 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ the result from a second limb vector.
+ Copyright (C) 1992-2017 Free Software Foundation, Inc.
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not,
+ see <http://www.gnu.org/licenses/>. */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS 4+16 /* space for 4 saved regs */
+#define RES PARMS
+#define S1 RES+4
+#define SIZE S1+4
+#define S2LIMB SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define sizeP ecx
+#define s2_limb ebx
+
+ .text
+ENTRY (__mpn_submul_1)
+
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (edi, 12)
+ cfi_rel_offset (esi, 8)
+ cfi_rel_offset (ebp, 4)
+ cfi_rel_offset (ebx, 0)
+
+ movl RES(%esp), %res_ptr
+ movl S1(%esp), %s1_ptr
+ movl SIZE(%esp), %sizeP
+ movl S2LIMB(%esp), %s2_limb
+ leal (%res_ptr,%sizeP,4), %res_ptr
+ leal (%s1_ptr,%sizeP,4), %s1_ptr
+ negl %sizeP
+ xorl %ebp, %ebp
+ ALIGN (3)
+L(oop):
+ movl (%s1_ptr,%sizeP,4), %eax
+ mull %s2_limb
+ addl %ebp, %eax
+ adcl $0, %edx
+ subl %eax, (%res_ptr,%sizeP,4)
+ adcl $0, %edx
+ movl %edx, %ebp
+
+ incl %sizeP
+ jnz L(oop)
+ movl %ebp, %eax
+
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ popl %ebp
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebp)
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edi)
+
+ ret
+END (__mpn_submul_1)
diff --git a/REORG.TODO/sysdeps/i386/symbol-hacks.h b/REORG.TODO/sysdeps/i386/symbol-hacks.h
new file mode 100644
index 0000000000..36a13c83f7
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/symbol-hacks.h
@@ -0,0 +1,21 @@
+/* Hacks needed for symbol manipulation. i386 version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdeps/wordsize-32/divdi3-symbol-hacks.h>
+
+#include_next "symbol-hacks.h"
diff --git a/REORG.TODO/sysdeps/i386/sys/ucontext.h b/REORG.TODO/sysdeps/i386/sys/ucontext.h
new file mode 100644
index 0000000000..fb5df11965
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/sys/ucontext.h
@@ -0,0 +1,139 @@
+/* Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* System V/i386 ABI compliant context switching support. */
+
+#ifndef _SYS_UCONTEXT_H
+#define _SYS_UCONTEXT_H 1
+
+#include <features.h>
+
+#include <bits/types/sigset_t.h>
+#include <bits/sigcontext.h>
+#include <bits/types/stack_t.h>
+
+
+/* Type for general register. */
+typedef int greg_t;
+
+/* Number of general registers. */
+#define __NGREG 19
+#ifdef __USE_MISC
+# define NGREG __NGREG
+#endif
+
+/* Container for all general registers. */
+typedef greg_t gregset_t[__NGREG];
+
+#ifdef __USE_MISC
+/* Number of each register is the `gregset_t' array. */
+enum
+{
+ REG_GS = 0,
+# define REG_GS REG_GS
+ REG_FS,
+# define REG_FS REG_FS
+ REG_ES,
+# define REG_ES REG_ES
+ REG_DS,
+# define REG_DS REG_DS
+ REG_EDI,
+# define REG_EDI REG_EDI
+ REG_ESI,
+# define REG_ESI REG_ESI
+ REG_EBP,
+# define REG_EBP REG_EBP
+ REG_ESP,
+# define REG_ESP REG_ESP
+ REG_EBX,
+# define REG_EBX REG_EBX
+ REG_EDX,
+# define REG_EDX REG_EDX
+ REG_ECX,
+# define REG_ECX REG_ECX
+ REG_EAX,
+# define REG_EAX REG_EAX
+ REG_TRAPNO,
+# define REG_TRAPNO REG_TRAPNO
+ REG_ERR,
+# define REG_ERR REG_ERR
+ REG_EIP,
+# define REG_EIP REG_EIP
+ REG_CS,
+# define REG_CS REG_CS
+ REG_EFL,
+# define REG_EFL REG_EFL
+ REG_UESP,
+# define REG_UESP REG_UESP
+ REG_SS
+# define REG_SS REG_SS
+};
+#endif
+
+#ifdef __USE_MISC
+# define __ctx(fld) fld
+# define __ctxt(tag) tag
+#else
+# define __ctx(fld) __ ## fld
+# define __ctxt(tag) /* Empty. */
+#endif
+
+/* Structure to describe FPU registers. */
+typedef struct fpregset
+ {
+ union
+ {
+ struct __ctxt(fpchip_state)
+ {
+ int __ctx(state)[27];
+ int __ctx(status);
+ } __ctx(fpchip_state);
+
+ struct __ctxt(fp_emul_space)
+ {
+ char __ctx(fp_emul)[246];
+ char __ctx(fp_epad)[2];
+ } __ctx(fp_emul_space);
+
+ int __ctx(f_fpregs)[62];
+ } __ctx(fp_reg_set);
+
+ long int __ctx(f_wregs)[33];
+ } fpregset_t;
+
+/* Context to describe whole processor state. */
+typedef struct
+ {
+ gregset_t __ctx(gregs);
+ fpregset_t __ctx(fpregs);
+ } mcontext_t;
+
+#undef __ctx
+#undef __ctxt
+
+/* Userlevel context. */
+typedef struct ucontext
+ {
+ unsigned long int uc_flags;
+ struct ucontext *uc_link;
+ sigset_t uc_sigmask;
+ stack_t uc_stack;
+ mcontext_t uc_mcontext;
+ long int uc_filler[5];
+ } ucontext_t;
+
+#endif /* sys/ucontext.h */
diff --git a/REORG.TODO/sysdeps/i386/sysdep.h b/REORG.TODO/sysdeps/i386/sysdep.h
new file mode 100644
index 0000000000..d2b0860b99
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/sysdep.h
@@ -0,0 +1,159 @@
+/* Assembler macros for i386.
+ Copyright (C) 1991-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdeps/generic/sysdep.h>
+
+#include <features.h> /* For __GNUC_PREREQ. */
+
+/* It is desirable that the names of PIC thunks match those used by
+ GCC so that multiple copies are eliminated by the linker. Because
+ GCC 4.6 and earlier use __i686 in the names, it is necessary to
+ override that predefined macro. */
+#if defined __i686 && defined __ASSEMBLER__
+#undef __i686
+#define __i686 __i686
+#endif
+
+#ifdef __ASSEMBLER__
+# define GET_PC_THUNK(reg) __x86.get_pc_thunk.reg
+#else
+# define GET_PC_THUNK_STR(reg) "__x86.get_pc_thunk." #reg
+#endif
+
+#ifdef __ASSEMBLER__
+
+/* Syntactic details of assembler. */
+
+/* ELF uses byte-counts for .align, most others use log2 of count of bytes. */
+#define ALIGNARG(log2) 1<<log2
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
+
+
+/* Define an entry point visible from C.
+
+ There is currently a bug in gdb which prevents us from specifying
+ incomplete stabs information. Fake some entries here which specify
+ the current source file. */
+#define ENTRY(name) \
+ .globl C_SYMBOL_NAME(name); \
+ .type C_SYMBOL_NAME(name),@function; \
+ .align ALIGNARG(4); \
+ C_LABEL(name) \
+ cfi_startproc; \
+ CALL_MCOUNT
+
+#undef END
+#define END(name) \
+ cfi_endproc; \
+ ASM_SIZE_DIRECTIVE(name)
+
+#define ENTRY_CHK(name) ENTRY (name)
+#define END_CHK(name) END (name)
+
+/* If compiled for profiling, call `mcount' at the start of each function. */
+#ifdef PROF
+/* The mcount code relies on a normal frame pointer being on the stack
+ to locate our caller, so push one just for its benefit. */
+#define CALL_MCOUNT \
+ pushl %ebp; cfi_adjust_cfa_offset (4); movl %esp, %ebp; \
+ cfi_def_cfa_register (ebp); call JUMPTARGET(mcount); \
+ popl %ebp; cfi_def_cfa (esp, 4);
+#else
+#define CALL_MCOUNT /* Do nothing. */
+#endif
+
+/* Since C identifiers are not normally prefixed with an underscore
+ on this system, the asm identifier `syscall_error' intrudes on the
+ C name space. Make sure we use an innocuous name. */
+#define syscall_error __syscall_error
+#define mcount _mcount
+
+#define PSEUDO(name, syscall_name, args) \
+ .globl syscall_error; \
+lose: SYSCALL_PIC_SETUP \
+ jmp JUMPTARGET(syscall_error); \
+ ENTRY (name) \
+ DO_CALL (syscall_name, args); \
+ jb lose
+
+#undef PSEUDO_END
+#define PSEUDO_END(name) \
+ END (name)
+
+# define SETUP_PIC_REG(reg) \
+ .ifndef GET_PC_THUNK(reg); \
+ .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \
+ .globl GET_PC_THUNK(reg); \
+ .hidden GET_PC_THUNK(reg); \
+ .p2align 4; \
+ .type GET_PC_THUNK(reg),@function; \
+GET_PC_THUNK(reg): \
+ movl (%esp), %e##reg; \
+ ret; \
+ .size GET_PC_THUNK(reg), . - GET_PC_THUNK(reg); \
+ .previous; \
+ .endif; \
+ call GET_PC_THUNK(reg)
+
+# define LOAD_PIC_REG(reg) \
+ SETUP_PIC_REG(reg); addl $_GLOBAL_OFFSET_TABLE_, %e##reg
+
+#undef JUMPTARGET
+#ifdef PIC
+#define JUMPTARGET(name) name##@PLT
+#define SYSCALL_PIC_SETUP \
+ pushl %ebx; \
+ cfi_adjust_cfa_offset (4); \
+ call 0f; \
+0: popl %ebx; \
+ cfi_adjust_cfa_offset (-4); \
+ addl $_GLOBAL_OFFSET_TABLE_+[.-0b], %ebx;
+
+#else
+#define JUMPTARGET(name) name
+#define SYSCALL_PIC_SETUP /* Nothing. */
+#endif
+
+/* Local label name for asm code. */
+#ifndef L
+#define L(name) .L##name
+#endif
+
+#define atom_text_section .section ".text.atom", "ax"
+
+#else /* __ASSEMBLER__ */
+
+# define SETUP_PIC_REG_STR(reg) \
+ ".ifndef " GET_PC_THUNK_STR (reg) "\n" \
+ ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \
+ ".globl " GET_PC_THUNK_STR (reg) "\n" \
+ ".hidden " GET_PC_THUNK_STR (reg) "\n" \
+ ".p2align 4\n" \
+ ".type " GET_PC_THUNK_STR (reg) ",@function\n" \
+GET_PC_THUNK_STR (reg) ":" \
+ "movl (%%esp), %%e" #reg "\n" \
+ "ret\n" \
+ ".size " GET_PC_THUNK_STR (reg) ", . - " GET_PC_THUNK_STR (reg) "\n" \
+ ".previous\n" \
+ ".endif\n" \
+ "call " GET_PC_THUNK_STR (reg)
+
+# define LOAD_PIC_REG_STR(reg) \
+ SETUP_PIC_REG_STR (reg) "\naddl $_GLOBAL_OFFSET_TABLE_, %%e" #reg
+
+#endif /* __ASSEMBLER__ */
diff --git a/REORG.TODO/sysdeps/i386/tls-macros.h b/REORG.TODO/sysdeps/i386/tls-macros.h
new file mode 100644
index 0000000000..053cba05d1
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tls-macros.h
@@ -0,0 +1,78 @@
+#include <features.h> /* For __GNUC_PREREQ. */
+
+#define TLS_LE(x) \
+ ({ int *__l; \
+ asm ("movl %%gs:0,%0\n\t" \
+ "subl $" #x "@tpoff,%0" \
+ : "=r" (__l)); \
+ __l; })
+
+#if defined PIC && !__GNUC_PREREQ (5,0)
+# define TLS_IE(x) \
+ ({ int *__l; \
+ asm ("movl %%gs:0,%0\n\t" \
+ "subl " #x "@gottpoff(%%ebx),%0" \
+ : "=r" (__l)); \
+ __l; })
+#else
+# define TLS_IE(x) \
+ ({ int *__l, __b; \
+ asm ("call 1f\n\t" \
+ ".subsection 1\n" \
+ "1:\tmovl (%%esp), %%ebx\n\t" \
+ "ret\n\t" \
+ ".previous\n\t" \
+ "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" \
+ "movl %%gs:0,%0\n\t" \
+ "subl " #x "@gottpoff(%%ebx),%0" \
+ : "=r" (__l), "=&b" (__b)); \
+ __l; })
+#endif
+
+#if defined PIC && !__GNUC_PREREQ (5,0)
+# define TLS_LD(x) \
+ ({ int *__l, __c, __d; \
+ asm ("leal " #x "@tlsldm(%%ebx),%%eax\n\t" \
+ "call ___tls_get_addr@plt\n\t" \
+ "leal " #x "@dtpoff(%%eax), %%eax" \
+ : "=a" (__l), "=&c" (__c), "=&d" (__d)); \
+ __l; })
+#else
+# define TLS_LD(x) \
+ ({ int *__l, __b, __c, __d; \
+ asm ("call 1f\n\t" \
+ ".subsection 1\n" \
+ "1:\tmovl (%%esp), %%ebx\n\t" \
+ "ret\n\t" \
+ ".previous\n\t" \
+ "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" \
+ "leal " #x "@tlsldm(%%ebx),%%eax\n\t" \
+ "call ___tls_get_addr@plt\n\t" \
+ "leal " #x "@dtpoff(%%eax), %%eax" \
+ : "=a" (__l), "=&b" (__b), "=&c" (__c), "=&d" (__d)); \
+ __l; })
+#endif
+
+#if defined PIC && !__GNUC_PREREQ (5,0)
+# define TLS_GD(x) \
+ ({ int *__l, __c, __d; \
+ asm ("leal " #x "@tlsgd(%%ebx),%%eax\n\t" \
+ "call ___tls_get_addr@plt\n\t" \
+ "nop" \
+ : "=a" (__l), "=&c" (__c), "=&d" (__d)); \
+ __l; })
+#else
+# define TLS_GD(x) \
+ ({ int *__l, __b, __c, __d; \
+ asm ("call 1f\n\t" \
+ ".subsection 1\n" \
+ "1:\tmovl (%%esp), %%ebx\n\t" \
+ "ret\n\t" \
+ ".previous\n\t" \
+ "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" \
+ "leal " #x "@tlsgd(%%ebx),%%eax\n\t" \
+ "call ___tls_get_addr@plt\n\t" \
+ "nop" \
+ : "=a" (__l), "=&b" (__b), "=&c" (__c), "=&d" (__d)); \
+ __l; })
+#endif
diff --git a/REORG.TODO/sysdeps/i386/tlsdesc.c b/REORG.TODO/sysdeps/i386/tlsdesc.c
new file mode 100644
index 0000000000..90de2bb05e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tlsdesc.c
@@ -0,0 +1,268 @@
+/* Manage TLS descriptors. i386 version.
+ Copyright (C) 2005-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <ldsodefs.h>
+#include <elf/dynamic-link.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-unmap-segments.h>
+#include <tlsdeschtab.h>
+
+/* The following 4 functions take an entry_check_offset argument.
+ It's computed by the caller as an offset between its entry point
+ and the call site, such that by adding the built-in return address
+ that is implicitly passed to the function with this offset, we can
+ easily obtain the caller's entry point to compare with the entry
+ point given in the TLS descriptor. If it's changed, we want to
+ return immediately. */
+
+/* This function is used to lazily resolve TLS_DESC REL relocations
+ that reference the *ABS* segment in their own link maps. The
+ argument is the addend originally stored there. */
+
+void
+__attribute__ ((regparm (3))) attribute_hidden
+_dl_tlsdesc_resolve_abs_plus_addend_fixup (struct tlsdesc volatile *td,
+ struct link_map *l,
+ ptrdiff_t entry_check_offset)
+{
+ ptrdiff_t addend = (ptrdiff_t) td->arg;
+
+ if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0)
+ - entry_check_offset))
+ return;
+
+#ifndef SHARED
+ CHECK_STATIC_TLS (l, l);
+#else
+ if (!TRY_STATIC_TLS (l, l))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic (l, addend);
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+#endif
+ {
+ td->arg = (void*) (addend - l->l_tls_offset);
+ td->entry = _dl_tlsdesc_return;
+ }
+
+ _dl_tlsdesc_wake_up_held_fixups ();
+}
+
+/* This function is used to lazily resolve TLS_DESC REL relocations
+ that originally had zero addends. The argument location, that
+ originally held the addend, is used to hold a pointer to the
+ relocation, but it has to be restored before we call the function
+ that applies relocations. */
+
+void
+__attribute__ ((regparm (3))) attribute_hidden
+_dl_tlsdesc_resolve_rel_fixup (struct tlsdesc volatile *td,
+ struct link_map *l,
+ ptrdiff_t entry_check_offset)
+{
+ const ElfW(Rel) *reloc = td->arg;
+
+ if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0)
+ - entry_check_offset))
+ return;
+
+ /* The code below was borrowed from _dl_fixup(),
+ except for checking for STB_LOCAL. */
+ const ElfW(Sym) *const symtab
+ = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
+ const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
+ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
+ lookup_t result;
+
+ /* Look up the target symbol. If the normal lookup rules are not
+ used don't look in the global scope. */
+ if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
+ && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
+ {
+ const struct r_found_version *version = NULL;
+
+ if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW(Half) *vernum =
+ (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
+ ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
+ version = &l->l_versions[ndx];
+ if (version->hash == 0)
+ version = NULL;
+ }
+
+ result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
+ l->l_scope, version, ELF_RTYPE_CLASS_PLT,
+ DL_LOOKUP_ADD_DEPENDENCY, NULL);
+ }
+ else
+ {
+ /* We already found the symbol. The module (and therefore its load
+ address) is also known. */
+ result = l;
+ }
+
+ if (!sym)
+ {
+ td->arg = 0;
+ td->entry = _dl_tlsdesc_undefweak;
+ }
+ else
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (l, result);
+# else
+ if (!TRY_STATIC_TLS (l, result))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value);
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+# endif
+ {
+ td->arg = (void*)(sym->st_value - result->l_tls_offset);
+ td->entry = _dl_tlsdesc_return;
+ }
+ }
+
+ _dl_tlsdesc_wake_up_held_fixups ();
+}
+
+/* This function is used to lazily resolve TLS_DESC RELA relocations.
+ The argument location is used to hold a pointer to the relocation. */
+
+void
+__attribute__ ((regparm (3))) attribute_hidden
+_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
+ struct link_map *l,
+ ptrdiff_t entry_check_offset)
+{
+ const ElfW(Rela) *reloc = td->arg;
+
+ if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0)
+ - entry_check_offset))
+ return;
+
+ /* The code below was borrowed from _dl_fixup(),
+ except for checking for STB_LOCAL. */
+ const ElfW(Sym) *const symtab
+ = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
+ const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
+ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
+ lookup_t result;
+
+ /* Look up the target symbol. If the normal lookup rules are not
+ used don't look in the global scope. */
+ if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
+ && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
+ {
+ const struct r_found_version *version = NULL;
+
+ if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW(Half) *vernum =
+ (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
+ ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
+ version = &l->l_versions[ndx];
+ if (version->hash == 0)
+ version = NULL;
+ }
+
+ result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
+ l->l_scope, version, ELF_RTYPE_CLASS_PLT,
+ DL_LOOKUP_ADD_DEPENDENCY, NULL);
+ }
+ else
+ {
+ /* We already found the symbol. The module (and therefore its load
+ address) is also known. */
+ result = l;
+ }
+
+ if (!sym)
+ {
+ td->arg = (void*) reloc->r_addend;
+ td->entry = _dl_tlsdesc_undefweak;
+ }
+ else
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (l, result);
+# else
+ if (!TRY_STATIC_TLS (l, result))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
+ + reloc->r_addend);
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+# endif
+ {
+ td->arg = (void*) (sym->st_value - result->l_tls_offset
+ + reloc->r_addend);
+ td->entry = _dl_tlsdesc_return;
+ }
+ }
+
+ _dl_tlsdesc_wake_up_held_fixups ();
+}
+
+/* This function is used to avoid busy waiting for other threads to
+ complete the lazy relocation. Once another thread wins the race to
+ relocate a TLS descriptor, it sets the descriptor up such that this
+ function is called to wait until the resolver releases the
+ lock. */
+
+void
+__attribute__ ((regparm (3))) attribute_hidden
+_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
+ struct link_map *l __attribute__((__unused__)),
+ ptrdiff_t entry_check_offset)
+{
+ /* Maybe we're lucky and can return early. */
+ if (__builtin_return_address (0) - entry_check_offset != td->entry)
+ return;
+
+ /* Locking here will stop execution until the running resolver runs
+ _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
+
+ FIXME: We'd be better off waiting on a condition variable, such
+ that we didn't have to hold the lock throughout the relocation
+ processing. */
+ __rtld_lock_lock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_lock));
+}
+
+
+/* Unmap the dynamic object, but also release its TLS descriptor table
+ if there is one. */
+
+void
+internal_function
+_dl_unmap (struct link_map *map)
+{
+ _dl_unmap_segments (map);
+
+#ifdef SHARED
+ if (map->l_mach.tlsdesc_table)
+ htab_delete (map->l_mach.tlsdesc_table);
+#endif
+}
diff --git a/REORG.TODO/sysdeps/i386/tlsdesc.sym b/REORG.TODO/sysdeps/i386/tlsdesc.sym
new file mode 100644
index 0000000000..33854975d0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tlsdesc.sym
@@ -0,0 +1,17 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+DTV_OFFSET offsetof(struct pthread, header.dtv)
+
+TLSDESC_ARG offsetof(struct tlsdesc, arg)
+
+TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
diff --git a/REORG.TODO/sysdeps/i386/tst-audit.h b/REORG.TODO/sysdeps/i386/tst-audit.h
new file mode 100644
index 0000000000..87bf199c85
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-audit.h
@@ -0,0 +1,25 @@
+/* Definitions for testing PLT entry/exit auditing. i386 version.
+
+ Copyright (C) 2012-2017 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define pltenter la_i86_gnu_pltenter
+#define pltexit la_i86_gnu_pltexit
+#define La_regs La_i86_regs
+#define La_retval La_i86_retval
+#define int_retval lrv_eax
diff --git a/REORG.TODO/sysdeps/i386/tst-audit3.c b/REORG.TODO/sysdeps/i386/tst-audit3.c
new file mode 100644
index 0000000000..b67a59d733
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-audit3.c
@@ -0,0 +1,37 @@
+/* Test case for i386 preserved registers in dynamic linker.
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include "tst-audit3.h"
+
+static int
+do_test (void)
+{
+ long long ll = audit1_test (1, 2, 3);
+ if (ll != 30)
+ abort ();
+
+ float f = audit2_test (1, 2, 3);
+ if (f != 30)
+ abort ();
+
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/REORG.TODO/sysdeps/i386/tst-audit3.h b/REORG.TODO/sysdeps/i386/tst-audit3.h
new file mode 100644
index 0000000000..f6d3b9181e
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-audit3.h
@@ -0,0 +1,20 @@
+/* Test case for i386 preserved registers in dynamic linker.
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern long long audit1_test (int, int, int) __attribute__ ((regparm(3)));
+extern float audit2_test (int, int, int) __attribute__ ((regparm(3)));
diff --git a/REORG.TODO/sysdeps/i386/tst-auditmod3a.c b/REORG.TODO/sysdeps/i386/tst-auditmod3a.c
new file mode 100644
index 0000000000..a333cdcff9
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-auditmod3a.c
@@ -0,0 +1,38 @@
+/* Test case for i386 preserved registers in dynamic linker.
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include "tst-audit3.h"
+
+long long
+__attribute__ ((regparm(3)))
+audit1_test (int i, int j, int k)
+{
+ if (i != 1 || j != 2 || k != 3)
+ abort ();
+ return 30;
+}
+
+float
+__attribute__ ((regparm(3)))
+audit2_test (int i, int j, int k)
+{
+ if (i != 1 || j != 2 || k != 3)
+ abort ();
+ return 30;
+}
diff --git a/REORG.TODO/sysdeps/i386/tst-auditmod3b.c b/REORG.TODO/sysdeps/i386/tst-auditmod3b.c
new file mode 100644
index 0000000000..523f3cec90
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-auditmod3b.c
@@ -0,0 +1,186 @@
+/* Test case for i386 preserved registers in dynamic linker.
+ Copyright (C) 2015-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <link.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ const char *flagstr;
+ switch (flag)
+ {
+ case LA_ACT_CONSISTENT:
+ flagstr = "consistent";
+ break;
+ case LA_ACT_ADD:
+ flagstr = "add";
+ break;
+ case LA_ACT_DELETE:
+ flagstr = "delete";
+ break;
+ default:
+ printf ("activity: unknown activity %u\n", flag);
+ return;
+ }
+ printf ("activity: %s\n", flagstr);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ const char *flagstr;
+ switch (flag)
+ {
+ case LA_SER_ORIG:
+ flagstr = "LA_SET_ORIG";
+ break;
+ case LA_SER_LIBPATH:
+ flagstr = "LA_SER_LIBPATH";
+ break;
+ case LA_SER_RUNPATH:
+ flagstr = "LA_SER_RUNPATH";
+ break;
+ case LA_SER_CONFIG:
+ flagstr = "LA_SER_CONFIG";
+ break;
+ case LA_SER_DEFAULT:
+ flagstr = "LA_SER_DEFAULT";
+ break;
+ case LA_SER_SECURE:
+ flagstr = "LA_SER_SECURE";
+ break;
+ default:
+ printf ("objsearch: %s, unknown flag %d\n", name, flag);
+ return (char *) name;
+ }
+
+ printf ("objsearch: %s, %s\n", name, flagstr);
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind32: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#include "tst-audit.h"
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ if (strcmp (symname, "audit1_test") == 0
+ || strcmp (symname, "audit2_test") == 0)
+ {
+ if (regs->lr_eax != 1
+ || regs->lr_edx != 2
+ || regs->lr_ecx != 3)
+ abort ();
+
+ *framesizep = 200;
+ }
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx,
+ (ptrdiff_t) outregs->int_retval);
+
+ if (strcmp (symname, "audit1_test") == 0
+ || strcmp (symname, "audit2_test") == 0)
+ {
+ if (inregs->lr_eax != 1
+ || inregs->lr_edx != 2
+ || inregs->lr_ecx != 3)
+ abort ();
+
+ if (strcmp (symname, "audit1_test") == 0)
+ {
+ long long x = ((unsigned long long) outregs->lrv_eax
+ | (unsigned long long) outregs->lrv_edx << 32);
+
+ if (x != 30)
+ abort ();
+ }
+ else if (strcmp (symname, "audit2_test") == 0)
+ {
+ if (outregs->lrv_st0 != 30)
+ abort ();
+ }
+ }
+
+ return 0;
+}
diff --git a/REORG.TODO/sysdeps/i386/tst-ld-sse-use.sh b/REORG.TODO/sysdeps/i386/tst-ld-sse-use.sh
new file mode 100755
index 0000000000..83a1dc59fb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-ld-sse-use.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Make sure no code in ld.so uses xmm/ymm/zmm registers on i386.
+# Copyright (C) 2009-2017 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+set -e
+
+objpfx="$1"
+NM="$2"
+OBJDUMP="$3"
+READELF="$4"
+
+tmp=$(mktemp ${objpfx}tst-ld-sse-use.XXXXXX)
+trap 'rm -f "$tmp"' 1 2 3 15
+
+# List of object files we have to test
+rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os |
+ awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' |
+ sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,')
+rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)"
+
+# OBJECT symbols can be ignored.
+$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a |
+egrep " OBJECT *GLOBAL " |
+awk '{if ($7 != "ABS") print $8 }' |
+sort -u > "$tmp"
+declare -a objects
+objects=($(cat "$tmp"))
+
+objs="dl-runtime.os"
+tocheck="dl-runtime.os"
+
+while test -n "$objs"; do
+ this="$objs"
+ objs=""
+
+ for f in $this; do
+ undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}')
+ if test -n "$undef"; then
+ for s in $undef; do
+ for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do
+ if test "$obj" = "$s"; then
+ continue 2
+ fi
+ done
+ for o in $rtldobjs; do
+ ro=$(echo "$objpfx"../*/"$o")
+ if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then
+ if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then
+ echo "$o needed for $s"
+ objs="$objs $o"
+ fi
+ break;
+ fi
+ done
+ done
+ fi
+ done
+ tocheck="$tocheck$objs"
+done
+
+echo
+echo
+echo "object files needed: $tocheck"
+
+cp /dev/null "$tmp"
+for f in $tocheck; do
+ $OBJDUMP -d "$objpfx"../*/"$f" |
+ awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
+ while read fct; do
+ if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+ continue;
+ fi
+ echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp"
+ result=1
+ done
+done
+
+if test -s "$tmp"; then
+ echo
+ echo
+ cat "$tmp"
+ result=1
+else
+ result=0
+fi
+
+rm "$tmp"
+exit $result
diff --git a/REORG.TODO/sysdeps/i386/tst-stack-align.h b/REORG.TODO/sysdeps/i386/tst-stack-align.h
new file mode 100644
index 0000000000..76276d4a28
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/tst-stack-align.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdint.h>
+
+typedef struct { int i[4]; } int_al16 __attribute__((aligned (16)));
+
+#define TEST_STACK_ALIGN() \
+ ({ \
+ int_al16 _m; \
+ double _d = 12.0; \
+ long double _ld = 15.0; \
+ int _ret = 0; \
+ printf ("int_al16: %p %zu\n", &_m, __alignof (int_al16)); \
+ if ((((uintptr_t) &_m) & (__alignof (int_al16) - 1)) != 0) \
+ _ret = 1; \
+ \
+ printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \
+ if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \
+ _ret = 1; \
+ \
+ printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \
+ if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \
+ _ret = 1; \
+ _ret; \
+ })