diff options
author | Johann <johannkoenig@google.com> | 2012-01-05 10:09:39 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2012-01-05 10:09:39 -0800 |
commit | 0780f258da950e6d0a600ede35968ced93f407c5 (patch) | |
tree | 7c61948e09b75e80c9928dff3931ce1e532ed3ec /vp8/encoder | |
parent | b2c8dff727e9020baed12feb40268f555cd4c8fc (diff) | |
parent | 2b2c0c9bda036f3f284ff476d3292af94f406b83 (diff) | |
download | libvpx-0780f258da950e6d0a600ede35968ced93f407c5.tar libvpx-0780f258da950e6d0a600ede35968ced93f407c5.tar.gz libvpx-0780f258da950e6d0a600ede35968ced93f407c5.tar.bz2 libvpx-0780f258da950e6d0a600ede35968ced93f407c5.zip |
Merge "Improve SSSE3 fast quantizer function"
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/x86/quantize_ssse3.asm | 42 |
1 files changed, 20 insertions, 22 deletions
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 34cc9c3bb..e698e904c 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -80,6 +80,9 @@ sym(vp8_fast_quantize_b_ssse3): mov rdi, [rsi + vp8_blockd_dequant] mov rcx, [rsi + vp8_blockd_dqcoeff] + movdqa xmm2, xmm1 ;store y for getting eob + movdqa xmm3, xmm5 + pxor xmm1, xmm0 pxor xmm5, xmm4 psubw xmm1, xmm0 @@ -88,35 +91,30 @@ sym(vp8_fast_quantize_b_ssse3): movdqa [rax], xmm1 movdqa [rax + 16], xmm5 - movdqa xmm2, [rdi] - movdqa xmm3, [rdi + 16] - - pxor xmm4, xmm4 - pmullw xmm2, xmm1 - pmullw xmm3, xmm5 - - pcmpeqw xmm1, xmm4 ;non zero mask - pcmpeqw xmm5, xmm4 ;non zero mask - packsswb xmm1, xmm5 - pshufb xmm1, [GLOBAL(zz_shuf)] + movdqa xmm0, [rdi] + movdqa xmm4, [rdi + 16] - pmovmskb edx, xmm1 + pmullw xmm0, xmm1 + pmullw xmm4, xmm5 + pxor xmm1, xmm1 - xor rdi, rdi - mov eax, -1 - xor dx, ax ;flip the bits for bsr - bsr eax, edx + pcmpgtw xmm2, xmm1 ;calculate eob + pcmpgtw xmm3, xmm1 + packsswb xmm2, xmm3 + pshufb xmm2, [GLOBAL(zz_shuf)] - movdqa [rcx], xmm2 ;store dqcoeff - movdqa [rcx + 16], xmm3 ;store dqcoeff + pmovmskb edx, xmm2 + movdqa [rcx], xmm0 ;store dqcoeff + movdqa [rcx + 16], xmm4 ;store dqcoeff mov rcx, [rsi + vp8_blockd_eob] - sub edi, edx ;check for all zeros in bit mask - sar edi, 31 ;0 or -1 + bsr eax, edx ;count 0 add eax, 1 - and eax, edi ;if the bit mask was all zero, - ;then eob = 0 + + cmp edx, 0 ;if all 0, eob=0 + cmove eax, edx + mov BYTE PTR [rcx], al ;store eob ; begin epilog |