summaryrefslogtreecommitdiff
path: root/vp9/encoder/x86
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2014-12-22 09:35:29 -0800
committerJingning Han <jingning@google.com>2014-12-22 10:09:25 -0800
commitd0f237702745c4bfc0297d24f9465f960fb988ed (patch)
tree57183a417ecc6d3552170c14227a6f37608c7833 /vp9/encoder/x86
parentf4eab151c57fe7e3b608249ce0be53675e271143 (diff)
downloadlibvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar
libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.gz
libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.tar.bz2
libvpx-d0f237702745c4bfc0297d24f9465f960fb988ed.zip
Revert "Revert "Removal of legacy zbin_extra / zbin_oq_value.""
This reverts commit 9946ee23e0a4c158e26a505b162a072f81b8a3be. Fix the ssse3 asm function. Change-Id: I07f77a63aa98087626e45c4e87aa5dcafc0b0b07
Diffstat (limited to 'vp9/encoder/x86')
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c3
-rw-r--r--vp9/encoder/x86/vp9_dct_ssse3.c3
-rw-r--r--vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c26
-rw-r--r--vp9/encoder/x86/vp9_quantize_sse2.c8
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm12
5 files changed, 20 insertions, 32 deletions
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index e671f3998..ae22a0b32 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -254,7 +254,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
- int zbin_oq_value, uint16_t* eob_ptr,
+ uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
__m128i zero;
@@ -287,7 +287,6 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
- (void)zbin_oq_value;
(void)coeff_ptr;
// Pre-condition input (shift by two)
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c
index 237c5e278..5c0ad7892 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -23,7 +23,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
- int zbin_oq_value, uint16_t* eob_ptr,
+ uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
__m128i zero;
@@ -57,7 +57,6 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
- (void)zbin_oq_value;
(void)coeff_ptr;
// Pre-condition input (shift by two)
diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
index 55c6ed71f..0bce9c321 100644
--- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
@@ -24,7 +24,6 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
- int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
@@ -32,11 +31,11 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
__m128i zbins[2];
__m128i nzbins[2];
- zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value),
- (int)(zbin_ptr[1] + zbin_oq_value),
- (int)(zbin_ptr[1] + zbin_oq_value),
- (int)(zbin_ptr[0] + zbin_oq_value));
- zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value));
+ zbins[0] = _mm_set_epi32((int)zbin_ptr[1],
+ (int)zbin_ptr[1],
+ (int)zbin_ptr[1],
+ (int)zbin_ptr[0]);
+ zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
nzbins[0] = _mm_setzero_si128();
nzbins[1] = _mm_setzero_si128();
@@ -111,7 +110,6 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
- int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
@@ -120,14 +118,14 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr,
int idx = 0;
int idx_arr[1024];
int i, eob = -1;
- const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
- const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
+ const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
+ const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
(void)scan;
- zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value),
- (zbin1_tmp + zbin_oq_value),
- (zbin1_tmp + zbin_oq_value),
- (zbin0_tmp + zbin_oq_value));
- zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value));
+ zbins[0] = _mm_set_epi32(zbin1_tmp,
+ zbin1_tmp,
+ zbin1_tmp,
+ zbin0_tmp);
+ zbins[1] = _mm_set1_epi32(zbin1_tmp);
nzbins[0] = _mm_setzero_si128();
nzbins[1] = _mm_setzero_si128();
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
index e06eb2f15..679c66e30 100644
--- a/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -18,7 +18,7 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
- int zbin_oq_value, uint16_t* eob_ptr,
+ uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
__m128i zero;
@@ -39,13 +39,10 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
// Setup global values
{
- __m128i zbin_oq;
__m128i pw_1;
- zbin_oq = _mm_set1_epi16(zbin_oq_value);
zbin = _mm_load_si128((const __m128i*)zbin_ptr);
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
- zbin = _mm_add_epi16(zbin, zbin_oq);
pw_1 = _mm_set1_epi16(1);
zbin = _mm_sub_epi16(zbin, pw_1);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
@@ -229,14 +226,13 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
- int zbin_oq_value, uint16_t* eob_ptr,
+ uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
__m128i zero;
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
- (void)zbin_oq_value;
coeff_ptr += n_coeffs;
iscan_ptr += n_coeffs;
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index f5f05e799..72e01d646 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -17,7 +17,7 @@ SECTION .text
%macro QUANTIZE_FN 2
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, zbin_oq, \
+ shift, qcoeff, dqcoeff, dequant, \
eob, scan, iscan
cmp dword skipm, 0
jne .blank
@@ -29,13 +29,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
movifnidn zbinq, zbinmp
movifnidn roundq, roundmp
movifnidn quantq, quantmp
- movd m4, dword zbin_oqm ; m4 = zbin_oq
mova m0, [zbinq] ; m0 = zbin
- punpcklwd m4, m4
mova m1, [roundq] ; m1 = round
- pshufd m4, m4, 0
mova m2, [quantq] ; m2 = quant
- paddw m0, m4 ; m0 = zbin + zbin_oq
%ifidn %1, b_32x32
pcmpeqw m5, m5
psrlw m5, 15
@@ -55,7 +51,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
psllw m4, 1
%endif
pxor m5, m5 ; m5 = dedicated zero
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
lea coeffq, [ coeffq+ncoeffq*2]
lea iscanq, [ iscanq+ncoeffq*2]
lea qcoeffq, [ qcoeffq+ncoeffq*2]
@@ -220,7 +216,7 @@ QUANTIZE_FN b_32x32, 7
%macro QUANTIZE_FP 2
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, zbin_oq, \
+ shift, qcoeff, dqcoeff, dequant, \
eob, scan, iscan
cmp dword skipm, 0
jne .blank
@@ -248,7 +244,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
psllw m2, 1
%endif
pxor m5, m5 ; m5 = dedicated zero
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
lea coeffq, [ coeffq+ncoeffq*2]
lea iscanq, [ iscanq+ncoeffq*2]
lea qcoeffq, [ qcoeffq+ncoeffq*2]