diff options
author | Jingning Han <jingning@google.com> | 2014-12-22 09:35:29 -0800 |
---|---|---|
committer | Jingning Han <jingning@google.com> | 2014-12-22 10:09:25 -0800 |
commit | d0f237702745c4bfc0297d24f9465f960fb988ed (patch) | |
tree | 57183a417ecc6d3552170c14227a6f37608c7833 /vp9/encoder/x86 | |
parent | f4eab151c57fe7e3b608249ce0be53675e271143 (diff) | |
download | libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.gz libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.bz2 libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.zip |
Revert "Revert "Removal of legacy zbin_extra / zbin_oq_value.""
This reverts commit 9946ee23e0a4c158e26a505b162a072f81b8a3be.
Fix the ssse3 asm function.
Change-Id: I07f77a63aa98087626e45c4e87aa5dcafc0b0b07
Diffstat (limited to 'vp9/encoder/x86')
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 3 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_ssse3.c | 3 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c | 26 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_sse2.c | 8 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm | 12 |
5 files changed, 20 insertions, 32 deletions
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index e671f3998..ae22a0b32 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -254,7 +254,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - int zbin_oq_value, uint16_t* eob_ptr, + uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -287,7 +287,6 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)coeff_ptr; // Pre-condition input (shift by two) diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c index 237c5e278..5c0ad7892 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/vp9/encoder/x86/vp9_dct_ssse3.c @@ -23,7 +23,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - int zbin_oq_value, uint16_t* eob_ptr, + uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -57,7 +57,6 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; (void)coeff_ptr; // Pre-condition input (shift by two) diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c index 55c6ed71f..0bce9c321 100644 --- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c @@ -24,7 +24,6 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { @@ -32,11 +31,11 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, __m128i zbins[2]; __m128i nzbins[2]; - zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value), - (int)(zbin_ptr[1] + zbin_oq_value), - (int)(zbin_ptr[1] + zbin_oq_value), - (int)(zbin_ptr[0] + zbin_oq_value)); - zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value)); + zbins[0] = _mm_set_epi32((int)zbin_ptr[1], + (int)zbin_ptr[1], + (int)zbin_ptr[1], + (int)zbin_ptr[0]); + zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]); nzbins[0] = _mm_setzero_si128(); nzbins[1] = _mm_setzero_si128(); @@ -111,7 +110,6 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { @@ -120,14 +118,14 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, int idx = 0; int idx_arr[1024]; int i, eob = -1; - const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); - const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); + const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1); + const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1); (void)scan; - zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value), - (zbin1_tmp + zbin_oq_value), - (zbin1_tmp + zbin_oq_value), - (zbin0_tmp + zbin_oq_value)); - zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value)); + zbins[0] = _mm_set_epi32(zbin1_tmp, + zbin1_tmp, + zbin1_tmp, + zbin0_tmp); + zbins[1] = _mm_set1_epi32(zbin1_tmp); nzbins[0] = _mm_setzero_si128(); nzbins[1] = _mm_setzero_si128(); diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c index e06eb2f15..679c66e30 100644 --- a/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/vp9/encoder/x86/vp9_quantize_sse2.c @@ -18,7 +18,7 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - int zbin_oq_value, uint16_t* eob_ptr, + uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -39,13 +39,10 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, // Setup global values { - __m128i zbin_oq; __m128i pw_1; - zbin_oq = _mm_set1_epi16(zbin_oq_value); zbin = _mm_load_si128((const __m128i*)zbin_ptr); round = _mm_load_si128((const __m128i*)round_ptr); quant = _mm_load_si128((const __m128i*)quant_ptr); - zbin = _mm_add_epi16(zbin, zbin_oq); pw_1 = _mm_set1_epi16(1); zbin = _mm_sub_epi16(zbin, pw_1); dequant = _mm_load_si128((const __m128i*)dequant_ptr); @@ -229,14 +226,13 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - int zbin_oq_value, uint16_t* eob_ptr, + uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; - (void)zbin_oq_value; coeff_ptr += n_coeffs; iscan_ptr += n_coeffs; diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index f5f05e799..72e01d646 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm @@ -17,7 +17,7 @@ SECTION .text %macro QUANTIZE_FN 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ + shift, qcoeff, dqcoeff, dequant, \ eob, scan, iscan cmp dword skipm, 0 jne .blank @@ -29,13 +29,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ movifnidn zbinq, zbinmp movifnidn roundq, roundmp movifnidn quantq, quantmp - movd m4, dword zbin_oqm ; m4 = zbin_oq mova m0, [zbinq] ; m0 = zbin - punpcklwd m4, m4 mova m1, [roundq] ; m1 = round - pshufd m4, m4, 0 mova m2, [quantq] ; m2 = quant - paddw m0, m4 ; m0 = zbin + zbin_oq %ifidn %1, b_32x32 pcmpeqw m5, m5 psrlw m5, 15 @@ -55,7 +51,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psllw m4, 1 %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob lea coeffq, [ coeffq+ncoeffq*2] lea iscanq, [ iscanq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] @@ -220,7 +216,7 @@ QUANTIZE_FN b_32x32, 7 %macro QUANTIZE_FP 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ + shift, qcoeff, dqcoeff, dequant, \ eob, scan, iscan cmp dword skipm, 0 jne .blank @@ -248,7 +244,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psllw m2, 1 %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob lea coeffq, [ coeffq+ncoeffq*2] lea iscanq, [ iscanq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] |