diff options
author | Ronald S. Bultje <rbultje@google.com> | 2013-07-03 09:05:48 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2013-07-03 09:05:48 -0700 |
commit | 61fe678f36f901e61cabfb1513b504db9bf375b6 (patch) | |
tree | 550392db221c3844b34aa467fb04fec325dbd646 /vp9/encoder/x86/vp9_quantize_ssse3.asm | |
parent | 98c493a1c0fea6ce5b0e4e56f169ab2b772704c2 (diff) | |
parent | e5fb4b61b66d188b3afed56f1e2548dd6e1a2074 (diff) | |
download | libvpx-61fe678f36f901e61cabfb1513b504db9bf375b6.tar libvpx-61fe678f36f901e61cabfb1513b504db9bf375b6.tar.gz libvpx-61fe678f36f901e61cabfb1513b504db9bf375b6.tar.bz2 libvpx-61fe678f36f901e61cabfb1513b504db9bf375b6.zip |
Merge "Use pmovmskb to skip quantize loops over empty coefficients."
Diffstat (limited to 'vp9/encoder/x86/vp9_quantize_ssse3.asm')
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_ssse3.asm | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm index b666abbd9..60f799195 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -15,10 +15,10 @@ pw_1: times 8 dw 1 SECTION .text -%macro QUANTIZE_FN 1 -cglobal quantize_%1, 0, 6, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, zbin_oq, \ - eob, scan, iscan +%macro QUANTIZE_FN 2 +cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ + shift, qcoeff, dqcoeff, dequant, zbin_oq, \ + eob, scan, iscan cmp dword skipm, 0 jne .blank @@ -43,9 +43,8 @@ cglobal quantize_%1, 0, 6, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m4, [r2] ; m4 = shift mov r4, dqcoeffmp mov r5, iscanmp - mov r2, eobmp pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, eob, qcoeff, dqcoeff, iscan + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob lea coeffq, [ coeffq+ncoeffq*2] lea iscanq, [ iscanq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] @@ -119,6 +118,12 @@ cglobal quantize_%1, 0, 6, 15, coeff, ncoeff, skip, zbin, round, quant, \ %endif pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin +%ifidn %1, b_32x32 + pmovmskb r6, m7 + pmovmskb r2, m12 + or r6, r2 + jz .skip_iter +%endif paddw m6, m1 ; m6 += round paddw m11, m1 ; m11 += round pmulhw m14, m6, m2 ; m14 = m6*q>>16 @@ -159,16 +164,27 @@ cglobal quantize_%1, 0, 6, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmaxsw m8, m13 add ncoeffq, mmsize jl .ac_only_loop +%ifidn %1, b_32x32 + jmp .accumulate_eob +.skip_iter: + mova [qcoeffq+ncoeffq*2+ 0], m5 + mova [qcoeffq+ncoeffq*2+16], m5 + mova [dqcoeffq+ncoeffq*2+ 0], m5 + mova [dqcoeffq+ncoeffq*2+16], m5 + add ncoeffq, mmsize + jl .ac_only_loop +%endif .accumulate_eob: ; horizontally accumulate/max eobs and write into [eob] memory pointer + mov r2, eobmp pshufd m7, m8, 0xe pmaxsw m8, m7 pshuflw m7, m8, 0xe pmaxsw m8, m7 pshuflw m7, m8, 0x1 pmaxsw m8, m7 - pextrw [eobq], m8, 0 + pextrw [r2], m8, 0 RET ; skip-block, i.e. just write all zeroes @@ -194,5 +210,5 @@ cglobal quantize_%1, 0, 6, 15, coeff, ncoeff, skip, zbin, round, quant, \ %endmacro INIT_XMM ssse3 -QUANTIZE_FN b -QUANTIZE_FN b_32x32 +QUANTIZE_FN b, 6 +QUANTIZE_FN b_32x32, 7 |