Revert "Revert "Removal of legacy zbin_extra / zbin_oq_value.""

This reverts commit 9946ee23e0a4c158e26a505b162a072f81b8a3be. Fix the ssse3 asm function. Change-Id: I07f77a63aa98087626e45c4e87aa5dcafc0b0b07
author: Jingning Han <jingning@google.com> 2014-12-22 09:35:29 -0800
committer: Jingning Han <jingning@google.com> 2014-12-22 10:09:25 -0800
commit: d0f237702745c4bfc0297d24f9465f960fb988ed (patch)
tree: 57183a417ecc6d3552170c14227a6f37608c7833 /vp9/encoder/x86
parent: f4eab151c57fe7e3b608249ce0be53675e271143 (diff)
download: libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar
libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.gz
libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.bz2
libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.zip
5 files changed, 20 insertions, 32 deletions
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index e671f3998..ae22a0b32 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -254,7 +254,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
                             const int16_t* round_ptr, const int16_t* quant_ptr,
                             const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
                             int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                            int zbin_oq_value, uint16_t* eob_ptr,
+                            uint16_t* eob_ptr,
                             const int16_t* scan_ptr,
                             const int16_t* iscan_ptr) {
   __m128i zero;
@@ -287,7 +287,6 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
   (void)scan_ptr;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
-  (void)zbin_oq_value;
   (void)coeff_ptr;
 
   // Pre-condition input (shift by two)
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c
index 237c5e278..5c0ad7892 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -23,7 +23,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
                              const int16_t* quant_shift_ptr,
                              int16_t* qcoeff_ptr,
                              int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                             int zbin_oq_value, uint16_t* eob_ptr,
+                             uint16_t* eob_ptr,
                              const int16_t* scan_ptr,
                              const int16_t* iscan_ptr) {
   __m128i zero;
@@ -57,7 +57,6 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
   (void)scan_ptr;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
-  (void)zbin_oq_value;
   (void)coeff_ptr;
 
   // Pre-condition input (shift by two)
diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
index 55c6ed71f..0bce9c321 100644
--- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
@@ -24,7 +24,6 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
                                 tran_low_t *qcoeff_ptr,
                                 tran_low_t *dqcoeff_ptr,
                                 const int16_t *dequant_ptr,
-                                int zbin_oq_value,
                                 uint16_t *eob_ptr,
                                 const int16_t *scan,
                                 const int16_t *iscan) {
@@ -32,11 +31,11 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
   __m128i zbins[2];
   __m128i nzbins[2];
 
-  zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value),
-                           (int)(zbin_ptr[1] + zbin_oq_value),
-                           (int)(zbin_ptr[1] + zbin_oq_value),
-                           (int)(zbin_ptr[0] + zbin_oq_value));
-  zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value));
+  zbins[0] = _mm_set_epi32((int)zbin_ptr[1],
+                           (int)zbin_ptr[1],
+                           (int)zbin_ptr[1],
+                           (int)zbin_ptr[0]);
+  zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
 
   nzbins[0] = _mm_setzero_si128();
   nzbins[1] = _mm_setzero_si128();
@@ -111,7 +110,6 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr,
                                       tran_low_t *qcoeff_ptr,
                                       tran_low_t *dqcoeff_ptr,
                                       const int16_t *dequant_ptr,
-                                      int zbin_oq_value,
                                       uint16_t *eob_ptr,
                                       const int16_t *scan,
                                       const int16_t *iscan) {
@@ -120,14 +118,14 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr,
   int idx = 0;
   int idx_arr[1024];
   int i, eob = -1;
-  const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
-  const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
+  const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
+  const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
   (void)scan;
-  zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value),
-                           (zbin1_tmp + zbin_oq_value),
-                           (zbin1_tmp + zbin_oq_value),
-                           (zbin0_tmp + zbin_oq_value));
-  zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value));
+  zbins[0] = _mm_set_epi32(zbin1_tmp,
+                           zbin1_tmp,
+                           zbin1_tmp,
+                           zbin0_tmp);
+  zbins[1] = _mm_set1_epi32(zbin1_tmp);
 
   nzbins[0] = _mm_setzero_si128();
   nzbins[1] = _mm_setzero_si128();
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
index e06eb2f15..679c66e30 100644
--- a/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -18,7 +18,7 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
                          const int16_t* round_ptr, const int16_t* quant_ptr,
                          const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
                          int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                         int zbin_oq_value, uint16_t* eob_ptr,
+                         uint16_t* eob_ptr,
                          const int16_t* scan_ptr,
                          const int16_t* iscan_ptr) {
   __m128i zero;
@@ -39,13 +39,10 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
 
       // Setup global values
       {
-        __m128i zbin_oq;
         __m128i pw_1;
-        zbin_oq = _mm_set1_epi16(zbin_oq_value);
         zbin = _mm_load_si128((const __m128i*)zbin_ptr);
         round = _mm_load_si128((const __m128i*)round_ptr);
         quant = _mm_load_si128((const __m128i*)quant_ptr);
-        zbin = _mm_add_epi16(zbin, zbin_oq);
         pw_1 = _mm_set1_epi16(1);
         zbin = _mm_sub_epi16(zbin, pw_1);
         dequant = _mm_load_si128((const __m128i*)dequant_ptr);
@@ -229,14 +226,13 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
                           const int16_t* round_ptr, const int16_t* quant_ptr,
                           const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
                           int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
-                          int zbin_oq_value, uint16_t* eob_ptr,
+                          uint16_t* eob_ptr,
                           const int16_t* scan_ptr,
                           const int16_t* iscan_ptr) {
   __m128i zero;
   (void)scan_ptr;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
-  (void)zbin_oq_value;
 
   coeff_ptr += n_coeffs;
   iscan_ptr += n_coeffs;
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index f5f05e799..72e01d646 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -17,7 +17,7 @@ SECTION .text
 
 %macro QUANTIZE_FN 2
 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
-                                shift, qcoeff, dqcoeff, dequant, zbin_oq, \
+                                shift, qcoeff, dqcoeff, dequant, \
                                 eob, scan, iscan
   cmp                    dword skipm, 0
   jne .blank
@@ -29,13 +29,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   movifnidn                    zbinq, zbinmp
   movifnidn                   roundq, roundmp
   movifnidn                   quantq, quantmp
-  movd                            m4, dword zbin_oqm       ; m4 = zbin_oq
   mova                            m0, [zbinq]              ; m0 = zbin
-  punpcklwd                       m4, m4
   mova                            m1, [roundq]             ; m1 = round
-  pshufd                          m4, m4, 0
   mova                            m2, [quantq]             ; m2 = quant
-  paddw                           m0, m4                   ; m0 = zbin + zbin_oq
 %ifidn %1, b_32x32
   pcmpeqw                         m5, m5
   psrlw                           m5, 15
@@ -55,7 +51,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   psllw                           m4, 1
 %endif
   pxor                            m5, m5                   ; m5 = dedicated zero
-  DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
+  DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
   lea                         coeffq, [  coeffq+ncoeffq*2]
   lea                         iscanq, [  iscanq+ncoeffq*2]
   lea                        qcoeffq, [ qcoeffq+ncoeffq*2]
@@ -220,7 +216,7 @@ QUANTIZE_FN b_32x32, 7
 
 %macro QUANTIZE_FP 2
 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
-                                shift, qcoeff, dqcoeff, dequant, zbin_oq, \
+                                shift, qcoeff, dqcoeff, dequant, \
                                 eob, scan, iscan
   cmp                    dword skipm, 0
   jne .blank
@@ -248,7 +244,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   psllw                           m2, 1
 %endif
   pxor                            m5, m5                   ; m5 = dedicated zero
-  DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
+  DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
   lea                         coeffq, [  coeffq+ncoeffq*2]
   lea                         iscanq, [  iscanq+ncoeffq*2]
   lea                        qcoeffq, [ qcoeffq+ncoeffq*2]
author	Jingning Han <jingning@google.com>	2014-12-22 09:35:29 -0800
committer	Jingning Han <jingning@google.com>	2014-12-22 10:09:25 -0800
commit	d0f237702745c4bfc0297d24f9465f960fb988ed (patch)
tree	57183a417ecc6d3552170c14227a6f37608c7833 /vp9/encoder/x86
parent	f4eab151c57fe7e3b608249ce0be53675e271143 (diff)
download	libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.gz libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.bz2 libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.zip