diff options
author | Johann <johannkoenig@google.com> | 2017-08-03 10:22:07 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2017-08-08 12:22:14 -0700 |
commit | d52cb5972930b68b0d0ce15b45d1a788dcd78322 (patch) | |
tree | ce9561fd1dcc68e820a3e826aa277994e3df6466 /vpx_dsp/vpx_dsp_rtcd_defs.pl | |
parent | cbb83ba4aa99b40b0b4a2a407bfd6d0d8be87d1f (diff) | |
download | libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar.gz libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar.bz2 libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.zip |
quantize: copy ssse3 optimizations to intrinsics
Fairly minor differences from sse2. pabsw and psignw are the big gains.
Also re-uses some values in eob calculation to avoid an extra pcmp.
Fixes test failures in HBD and OS X builds.
Allows using it in 32bit builds, where it is about 40% faster than sse2.
Substantially faster than the assembly for skip_block. 10-20% faster the
rest of the time.
Change-Id: If783bb3567e561e47667e10133b9c84414a334e2
Diffstat (limited to 'vpx_dsp/vpx_dsp_rtcd_defs.pl')
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 99ef262b1..a78b1cff7 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -671,7 +671,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b neon sse2/, "$ssse3_x86_64", "$avx_x86_64"; + specialize qw/vpx_quantize_b neon sse2 ssse3/, "$avx_x86_64"; add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64"; |