diff options
Diffstat (limited to 'vpx_dsp/x86/quantize_sse2.c')
-rw-r--r-- | vpx_dsp/x86/quantize_sse2.c | 59 |
1 files changed, 17 insertions, 42 deletions
diff --git a/vpx_dsp/x86/quantize_sse2.c b/vpx_dsp/x86/quantize_sse2.c index 2c7e431c7..0580a7bd7 100644 --- a/vpx_dsp/x86/quantize_sse2.c +++ b/vpx_dsp/x86/quantize_sse2.c @@ -13,32 +13,7 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" - -static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) { -#if CONFIG_VP9_HIGHBITDEPTH - return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1], - (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3], - (int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5], - (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]); -#else - return _mm_load_si128((const __m128i *)coeff_ptr); -#endif -} - -static INLINE void store_coefficients(__m128i coeff_vals, - tran_low_t *coeff_ptr) { -#if CONFIG_VP9_HIGHBITDEPTH - __m128i one = _mm_set1_epi16(1); - __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one); - __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one); - __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi); - __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi); - _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1); - _mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2); -#else - _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals); -#endif -} +#include "vpx_dsp/x86/fdct.h" void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, @@ -81,8 +56,8 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, __m128i qtmp0, qtmp1; __m128i cmp_mask0, cmp_mask1; // Do DC and first 15 AC - coeff0 = load_coefficients(coeff_ptr + n_coeffs); - coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8); + coeff0 = load_tran_low(coeff_ptr + n_coeffs); + coeff1 = load_tran_low(coeff_ptr + n_coeffs + 8); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); @@ -117,15 +92,15 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); - store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs); - store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8); + store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs); + store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); dequant = _mm_unpackhi_epi64(dequant, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - store_coefficients(coeff0, dqcoeff_ptr + n_coeffs); - store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8); + store_tran_low(coeff0, dqcoeff_ptr + n_coeffs); + store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8); } { @@ -159,8 +134,8 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, __m128i qtmp0, qtmp1; __m128i cmp_mask0, cmp_mask1; - coeff0 = load_coefficients(coeff_ptr + n_coeffs); - coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8); + coeff0 = load_tran_low(coeff_ptr + n_coeffs); + coeff1 = load_tran_low(coeff_ptr + n_coeffs + 8); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); @@ -191,14 +166,14 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); - store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs); - store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8); + store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs); + store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - store_coefficients(coeff0, dqcoeff_ptr + n_coeffs); - store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8); + store_tran_low(coeff0, dqcoeff_ptr + n_coeffs); + store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8); } { @@ -237,10 +212,10 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } } else { do { - store_coefficients(zero, dqcoeff_ptr + n_coeffs); - store_coefficients(zero, dqcoeff_ptr + n_coeffs + 8); - store_coefficients(zero, qcoeff_ptr + n_coeffs); - store_coefficients(zero, qcoeff_ptr + n_coeffs + 8); + store_tran_low(zero, dqcoeff_ptr + n_coeffs); + store_tran_low(zero, dqcoeff_ptr + n_coeffs + 8); + store_tran_low(zero, qcoeff_ptr + n_coeffs); + store_tran_low(zero, qcoeff_ptr + n_coeffs + 8); n_coeffs += 8 * 2; } while (n_coeffs < 0); *eob_ptr = 0; |