summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2013-09-05 14:09:40 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-09-05 14:09:40 -0700
commit1c263d69188ea5b707093b6e1f099ab2aca9dfae (patch)
tree824348b2a9d5dfd5dbeaaf6f7322a257ee0db7d7 /vp9
parent2156ccaa4a7a6fb25be1d34ad611f6e9b9ad3867 (diff)
parent458c2833c00de1d52f466eea3fb8f29067274006 (diff)
downloadlibvpx-1c263d69188ea5b707093b6e1f099ab2aca9dfae.tar
libvpx-1c263d69188ea5b707093b6e1f099ab2aca9dfae.tar.gz
libvpx-1c263d69188ea5b707093b6e1f099ab2aca9dfae.tar.bz2
libvpx-1c263d69188ea5b707093b6e1f099ab2aca9dfae.zip
Merge "Use saturated addition in SSSE3 of 32x32 quant"
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
-rw-r--r--vp9/encoder/vp9_quantize.c1
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3.asm11
3 files changed, 12 insertions, 2 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index c2777aa51..f5eeb2c51 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
specialize vp9_quantize_b $ssse3_x86_64
prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 # $ssse3_x86_64 FIXME(jingning): need a unit test on thisbefore enabled
+specialize vp9_quantize_b_32x32 $ssse3_x86_64
#
# Structured Similarity (SSIM)
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index fb0e4707a..96abeff38 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -135,6 +135,7 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
if (x >= zbin) {
x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ x = clamp(x, INT16_MIN, INT16_MAX);
y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
quant_shift_ptr[rc != 0]) >> 15; // quantize (x)
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index 7deb9815a..ae0d6cd3c 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -70,9 +70,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
punpckhqdq m0, m0
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+%ifidn %1, b_32x32
+ paddsw m6, m1
+ punpckhqdq m1, m1
+ paddsw m11, m1
+%else
paddw m6, m1 ; m6 += round
punpckhqdq m1, m1
paddw m11, m1 ; m11 += round
+%endif
pmulhw m8, m6, m2 ; m8 = m6*q>>16
punpckhqdq m2, m2
pmulhw m13, m11, m2 ; m13 = m11*q>>16
@@ -126,9 +132,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pmovmskb r2, m12
or r6, r2
jz .skip_iter
-%endif
+ paddsw m6, m1
+ paddsw m11, m1
+%else
paddw m6, m1 ; m6 += round
paddw m11, m1 ; m11 += round
+%endif
pmulhw m14, m6, m2 ; m14 = m6*q>>16
pmulhw m13, m11, m2 ; m13 = m11*q>>16
paddw m14, m6 ; m14 += m6