quantize: copy ssse3 optimizations to intrinsics

Fairly minor differences from sse2. pabsw and psignw are the big gains. Also re-uses some values in eob calculation to avoid an extra pcmp. Fixes test failures in HBD and OS X builds. Allows using it in 32bit builds, where it is about 40% faster than sse2. Substantially faster than the assembly for skip_block. 10-20% faster the rest of the time. Change-Id: If783bb3567e561e47667e10133b9c84414a334e2
author: Johann <johannkoenig@google.com> 2017-08-03 10:22:07 -0700
committer: Johann <johannkoenig@google.com> 2017-08-08 12:22:14 -0700
commit: d52cb5972930b68b0d0ce15b45d1a788dcd78322 (patch)
tree: ce9561fd1dcc68e820a3e826aa277994e3df6466 /vpx_dsp/vpx_dsp_rtcd_defs.pl
parent: cbb83ba4aa99b40b0b4a2a407bfd6d0d8be87d1f (diff)
download: libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar
libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar.gz
libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar.bz2
libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.zip
1 files changed, 1 insertions, 1 deletions
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 99ef262b1..a78b1cff7 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -671,7 +671,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 #
 if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
   add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vpx_quantize_b neon sse2/, "$ssse3_x86_64", "$avx_x86_64";
+  specialize qw/vpx_quantize_b neon sse2 ssse3/, "$avx_x86_64";
 
   add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
author	Johann <johannkoenig@google.com>	2017-08-03 10:22:07 -0700
committer	Johann <johannkoenig@google.com>	2017-08-08 12:22:14 -0700
commit	d52cb5972930b68b0d0ce15b45d1a788dcd78322 (patch)
tree	ce9561fd1dcc68e820a3e826aa277994e3df6466 /vpx_dsp/vpx_dsp_rtcd_defs.pl
parent	cbb83ba4aa99b40b0b4a2a407bfd6d0d8be87d1f (diff)
download	libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar.gz libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.tar.bz2 libvpx-d52cb5972930b68b0d0ce15b45d1a788dcd78322.zip