summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2012-01-05 10:09:39 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2012-01-05 10:09:39 -0800
commit0780f258da950e6d0a600ede35968ced93f407c5 (patch)
tree7c61948e09b75e80c9928dff3931ce1e532ed3ec /vp8/encoder
parentb2c8dff727e9020baed12feb40268f555cd4c8fc (diff)
parent2b2c0c9bda036f3f284ff476d3292af94f406b83 (diff)
downloadlibvpx-0780f258da950e6d0a600ede35968ced93f407c5.tar
libvpx-0780f258da950e6d0a600ede35968ced93f407c5.tar.gz
libvpx-0780f258da950e6d0a600ede35968ced93f407c5.tar.bz2
libvpx-0780f258da950e6d0a600ede35968ced93f407c5.zip
Merge "Improve SSSE3 fast quantizer function"
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/x86/quantize_ssse3.asm42
1 files changed, 20 insertions, 22 deletions
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index 34cc9c3bb..e698e904c 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -80,6 +80,9 @@ sym(vp8_fast_quantize_b_ssse3):
mov rdi, [rsi + vp8_blockd_dequant]
mov rcx, [rsi + vp8_blockd_dqcoeff]
+ movdqa xmm2, xmm1 ;store y for getting eob
+ movdqa xmm3, xmm5
+
pxor xmm1, xmm0
pxor xmm5, xmm4
psubw xmm1, xmm0
@@ -88,35 +91,30 @@ sym(vp8_fast_quantize_b_ssse3):
movdqa [rax], xmm1
movdqa [rax + 16], xmm5
- movdqa xmm2, [rdi]
- movdqa xmm3, [rdi + 16]
-
- pxor xmm4, xmm4
- pmullw xmm2, xmm1
- pmullw xmm3, xmm5
-
- pcmpeqw xmm1, xmm4 ;non zero mask
- pcmpeqw xmm5, xmm4 ;non zero mask
- packsswb xmm1, xmm5
- pshufb xmm1, [GLOBAL(zz_shuf)]
+ movdqa xmm0, [rdi]
+ movdqa xmm4, [rdi + 16]
- pmovmskb edx, xmm1
+ pmullw xmm0, xmm1
+ pmullw xmm4, xmm5
+ pxor xmm1, xmm1
- xor rdi, rdi
- mov eax, -1
- xor dx, ax ;flip the bits for bsr
- bsr eax, edx
+ pcmpgtw xmm2, xmm1 ;calculate eob
+ pcmpgtw xmm3, xmm1
+ packsswb xmm2, xmm3
+ pshufb xmm2, [GLOBAL(zz_shuf)]
- movdqa [rcx], xmm2 ;store dqcoeff
- movdqa [rcx + 16], xmm3 ;store dqcoeff
+ pmovmskb edx, xmm2
+ movdqa [rcx], xmm0 ;store dqcoeff
+ movdqa [rcx + 16], xmm4 ;store dqcoeff
mov rcx, [rsi + vp8_blockd_eob]
- sub edi, edx ;check for all zeros in bit mask
- sar edi, 31 ;0 or -1
+ bsr eax, edx ;count 0
add eax, 1
- and eax, edi ;if the bit mask was all zero,
- ;then eob = 0
+
+ cmp edx, 0 ;if all 0, eob=0
+ cmove eax, edx
+
mov BYTE PTR [rcx], al ;store eob
; begin epilog