diff options
author | James Zern <jzern@google.com> | 2016-07-02 03:08:32 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2016-07-02 03:08:33 +0000 |
commit | 5afa3b91502e3e7113466c3e1717eee5fe5662e6 (patch) | |
tree | 5337f85bd1d8d0e7b97a582118037e5bf2cb7d28 /vpx_dsp/x86 | |
parent | 3197172405b0ee892040c9cd67e096d36fc5f62c (diff) | |
parent | b2fb48cfcff0bfe1fc8ac699786557e34bc1019d (diff) | |
download | libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.tar libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.tar.gz libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.tar.bz2 libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.zip |
Merge "improve vpx_filter_block1d* based on replace paddsw+psrlw to pmulhrsw"
Diffstat (limited to 'vpx_dsp/x86')
-rw-r--r-- | vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm index 3c8cfd225..538b2129d 100644 --- a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm +++ b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm @@ -14,14 +14,14 @@ mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 + mov ecx, 0x01000100 movdqa xmm3, [rdx] ;load filters psrldq xmm3, 6 packsswb xmm3, xmm3 pshuflw xmm3, xmm3, 0b ;k3_k4 - movq xmm2, rcx ;rounding + movd xmm2, ecx ;rounding_shift pshufd xmm2, xmm2, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line @@ -33,8 +33,7 @@ punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm3 - paddsw xmm0, xmm2 ;rounding - psraw xmm0, 7 ;shift + pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack to byte %if %1 @@ -51,7 +50,7 @@ mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 + mov ecx, 0x01000100 movdqa xmm7, [rdx] ;load filters psrldq xmm7, 6 @@ -59,7 +58,7 @@ pshuflw xmm7, xmm7, 0b ;k3_k4 punpcklwd xmm7, xmm7 - movq xmm6, rcx ;rounding + movd xmm6, ecx ;rounding_shift pshufd xmm6, xmm6, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line @@ -71,8 +70,7 @@ punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm7 - paddsw xmm0, xmm6 ;rounding - psraw xmm0, 7 ;shift + pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack back to byte %if %1 @@ -92,10 +90,8 @@ pmaddubsw xmm0, xmm7 pmaddubsw xmm2, xmm7 - paddsw xmm0, xmm6 ;rounding - paddsw xmm2, xmm6 - psraw xmm0, 7 ;shift - psraw xmm2, 7 + pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) + pmulhrsw xmm2, xmm6 packuswb xmm0, xmm2 ;pack back to byte %if %1 |