diff options
author | Yunqing Wang <yunqingwang@google.com> | 2015-02-25 10:10:10 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-02-25 10:10:10 -0800 |
commit | 419ff1352e5002a4ea3ef78a0bd78c7396eb8977 (patch) | |
tree | d886ca82c17df00f033dfb2d0c83f251d5d6e95d /vp9/encoder | |
parent | 0f57d0a682f21d27747935fe80d75cb21769f576 (diff) | |
parent | 58e0159c80789da2257463624f56935988e1e7c1 (diff) | |
download | libvpx-419ff1352e5002a4ea3ef78a0bd78c7396eb8977.tar libvpx-419ff1352e5002a4ea3ef78a0bd78c7396eb8977.tar.gz libvpx-419ff1352e5002a4ea3ef78a0bd78c7396eb8977.tar.bz2 libvpx-419ff1352e5002a4ea3ef78a0bd78c7396eb8977.zip |
Merge "Fix ssse3 quantize_fp functions while skip=1"
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm | 67 |
1 files changed, 34 insertions, 33 deletions
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index 72e01d646..c35eb3603 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm @@ -15,6 +15,7 @@ pw_1: times 8 dw 1 SECTION .text +; TODO(yunqingwang)fix quantize_b code for skip=1 case. %macro QUANTIZE_FN 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ shift, qcoeff, dqcoeff, dequant, \ @@ -244,11 +245,11 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psllw m2, 1 %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob + lea coeffq, [ coeffq+ncoeffq*2] - lea iscanq, [ iscanq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] - lea dqcoeffq, [dqcoeffq+ncoeffq*2] + lea r5q, [ r5q+ncoeffq*2] + lea r3q, [ r3q+ncoeffq*2] + lea r4q, [r4q+ncoeffq*2] neg ncoeffq ; get DC and first 15 AC coeffs @@ -266,15 +267,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmulhw m13, m11, m2 ; m13 = m11*q>>16 psignw m8, m9 ; m8 = reinsert sign psignw m13, m10 ; m13 = reinsert sign - mova [qcoeffq+ncoeffq*2+ 0], m8 - mova [qcoeffq+ncoeffq*2+16], m13 + mova [r3q+ncoeffq*2+ 0], m8 + mova [r3q+ncoeffq*2+16], m13 %ifidn %1, fp_32x32 pabsw m8, m8 pabsw m13, m13 %endif - pmullw m8, m3 ; dqc[i] = qc[i] * q + pmullw m8, m3 ; r4[i] = r3[i] * q punpckhqdq m3, m3 - pmullw m13, m3 ; dqc[i] = qc[i] * q + pmullw m13, m3 ; r4[i] = r3[i] * q %ifidn %1, fp_32x32 psrlw m8, 1 psrlw m13, 1 @@ -282,12 +283,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m13, m10 psrlw m0, m3, 2 %endif - mova [dqcoeffq+ncoeffq*2+ 0], m8 - mova [dqcoeffq+ncoeffq*2+16], m13 + mova [r4q+ncoeffq*2+ 0], m8 + mova [r4q+ncoeffq*2+16], m13 pcmpeqw m8, m5 ; m8 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] + mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] + mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] psubw m6, m7 ; m6 = scan[i] + 1 psubw m11, m7 ; m11 = scan[i] + 1 pandn m8, m6 ; m8 = max(eob) @@ -318,26 +319,26 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmulhw m13, m11, m2 ; m13 = m11*q>>16 psignw m14, m9 ; m14 = reinsert sign psignw m13, m10 ; m13 = reinsert sign - mova [qcoeffq+ncoeffq*2+ 0], m14 - mova [qcoeffq+ncoeffq*2+16], m13 + mova [r3q+ncoeffq*2+ 0], m14 + mova [r3q+ncoeffq*2+16], m13 %ifidn %1, fp_32x32 pabsw m14, m14 pabsw m13, m13 %endif - pmullw m14, m3 ; dqc[i] = qc[i] * q - pmullw m13, m3 ; dqc[i] = qc[i] * q + pmullw m14, m3 ; r4[i] = r3[i] * q + pmullw m13, m3 ; r4[i] = r3[i] * q %ifidn %1, fp_32x32 psrlw m14, 1 psrlw m13, 1 psignw m14, m9 psignw m13, m10 %endif - mova [dqcoeffq+ncoeffq*2+ 0], m14 - mova [dqcoeffq+ncoeffq*2+16], m13 + mova [r4q+ncoeffq*2+ 0], m14 + mova [r4q+ncoeffq*2+16], m13 pcmpeqw m14, m5 ; m14 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] + mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] + mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] psubw m6, m7 ; m6 = scan[i] + 1 psubw m11, m7 ; m11 = scan[i] + 1 pandn m14, m6 ; m14 = max(eob) @@ -350,10 +351,10 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %ifidn %1, fp_32x32 jmp .accumulate_eob .skip_iter: - mova [qcoeffq+ncoeffq*2+ 0], m5 - mova [qcoeffq+ncoeffq*2+16], m5 - mova [dqcoeffq+ncoeffq*2+ 0], m5 - mova [dqcoeffq+ncoeffq*2+16], m5 + mova [r3q+ncoeffq*2+ 0], m5 + mova [r3q+ncoeffq*2+16], m5 + mova [r4q+ncoeffq*2+ 0], m5 + mova [r4q+ncoeffq*2+16], m5 add ncoeffq, mmsize jl .ac_only_loop %endif @@ -368,7 +369,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pshuflw m7, m8, 0x1 pmaxsw m8, m7 pextrw r6, m8, 0 - mov [r2], r6 + mov [r2], r6 RET ; skip-block, i.e. just write all zeroes @@ -377,19 +378,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ movifnidn ncoeffq, ncoeffmp mov r2, qcoeffmp mov r3, eobmp - DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob - lea dqcoeffq, [dqcoeffq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] + + lea r0q, [r0q+ncoeffq*2] + lea r2q, [r2q+ncoeffq*2] neg ncoeffq pxor m7, m7 .blank_loop: - mova [dqcoeffq+ncoeffq*2+ 0], m7 - mova [dqcoeffq+ncoeffq*2+16], m7 - mova [qcoeffq+ncoeffq*2+ 0], m7 - mova [qcoeffq+ncoeffq*2+16], m7 + mova [r0q+ncoeffq*2+ 0], m7 + mova [r0q+ncoeffq*2+16], m7 + mova [r2q+ncoeffq*2+ 0], m7 + mova [r2q+ncoeffq*2+16], m7 add ncoeffq, mmsize jl .blank_loop - mov word [eobq], 0 + mov word [r3q], 0 RET %endmacro |