summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2013-09-04 12:03:28 -0700
committerJingning Han <jingning@google.com>2013-09-05 12:49:12 -0700
commit458c2833c00de1d52f466eea3fb8f29067274006 (patch)
tree935df83ce8d420b0a8a211597b3cc7d7a175c632
parent1cf2272347f8a07b29a0ee2d9d6445826f2f7e6f (diff)
downloadlibvpx-458c2833c00de1d52f466eea3fb8f29067274006.tar
libvpx-458c2833c00de1d52f466eea3fb8f29067274006.tar.gz
libvpx-458c2833c00de1d52f466eea3fb8f29067274006.tar.bz2
libvpx-458c2833c00de1d52f466eea3fb8f29067274006.zip
Use saturated addition in SSSE3 of 32x32 quant
The 32x32 forward transform can potentially reach peak coefficient value close to 32700, while the rounding factor can go upto 610. This could cause overflow issue in the SSSE3 implementation of 32x32 quantization process. This commit resolves this issue by replacing the addition operations with saturated addition operations in 32x32 block quantization. Change-Id: Id6b98996458e16c5b6241338ca113c332bef6e70
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
-rw-r--r--vp9/encoder/vp9_quantize.c1
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3.asm11
3 files changed, 12 insertions, 2 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 104db6aeb..615e07ba9 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
specialize vp9_quantize_b $ssse3_x86_64
prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 # $ssse3_x86_64 FIXME(jingning): need a unit test on thisbefore enabled
+specialize vp9_quantize_b_32x32 $ssse3_x86_64
#
# Structured Similarity (SSIM)
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index fb0e4707a..96abeff38 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -135,6 +135,7 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
if (x >= zbin) {
x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ x = clamp(x, INT16_MIN, INT16_MAX);
y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
quant_shift_ptr[rc != 0]) >> 15; // quantize (x)
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index 7deb9815a..ae0d6cd3c 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -70,9 +70,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
punpckhqdq m0, m0
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+%ifidn %1, b_32x32
+ paddsw m6, m1
+ punpckhqdq m1, m1
+ paddsw m11, m1
+%else
paddw m6, m1 ; m6 += round
punpckhqdq m1, m1
paddw m11, m1 ; m11 += round
+%endif
pmulhw m8, m6, m2 ; m8 = m6*q>>16
punpckhqdq m2, m2
pmulhw m13, m11, m2 ; m13 = m11*q>>16
@@ -126,9 +132,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pmovmskb r2, m12
or r6, r2
jz .skip_iter
-%endif
+ paddsw m6, m1
+ paddsw m11, m1
+%else
paddw m6, m1 ; m6 += round
paddw m11, m1 ; m11 += round
+%endif
pmulhw m14, m6, m2 ; m14 = m6*q>>16
pmulhw m13, m11, m2 ; m13 = m11*q>>16
paddw m14, m6 ; m14 += m6