diff options
author | Jingning Han <jingning@google.com> | 2015-04-03 11:24:28 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-04-03 11:24:28 -0700 |
commit | 30e9c091c08e570b484fb0bc4c443ddd17fd7443 (patch) | |
tree | a6e3720d9425837dda1ad9b4228812f8ccaecfa5 | |
parent | d72ed35374db9956a274a816c1228a8d99adedbd (diff) | |
parent | 657cabe0f754a9da11a05670969234dae2520a6d (diff) | |
download | libvpx-30e9c091c08e570b484fb0bc4c443ddd17fd7443.tar libvpx-30e9c091c08e570b484fb0bc4c443ddd17fd7443.tar.gz libvpx-30e9c091c08e570b484fb0bc4c443ddd17fd7443.tar.bz2 libvpx-30e9c091c08e570b484fb0bc4c443ddd17fd7443.zip |
Merge "Tune SSSE3 assembly implementation to improve quantization speed"
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index c35eb3603..449d52b22 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm @@ -282,6 +282,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m8, m9 psignw m13, m10 psrlw m0, m3, 2 +%else + psrlw m0, m3, 1 %endif mova [r4q+ncoeffq*2+ 0], m8 mova [r4q+ncoeffq*2+16], m13 @@ -302,7 +304,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, fp_32x32 + pcmpgtw m7, m6, m0 pcmpgtw m12, m11, m0 pmovmskb r6d, m7 @@ -310,7 +312,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ or r6, r2 jz .skip_iter -%endif + pcmpeqw m7, m7 paddsw m6, m1 ; m6 += round @@ -348,7 +350,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ add ncoeffq, mmsize jl .ac_only_loop -%ifidn %1, fp_32x32 jmp .accumulate_eob .skip_iter: mova [r3q+ncoeffq*2+ 0], m5 @@ -357,7 +358,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova [r4q+ncoeffq*2+16], m5 add ncoeffq, mmsize jl .ac_only_loop -%endif .accumulate_eob: ; horizontally accumulate/max eobs and write into [eob] memory pointer |