summaryrefslogtreecommitdiff
path: root/vpx_dsp/x86
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2016-07-02 03:08:32 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2016-07-02 03:08:33 +0000
commit5afa3b91502e3e7113466c3e1717eee5fe5662e6 (patch)
tree5337f85bd1d8d0e7b97a582118037e5bf2cb7d28 /vpx_dsp/x86
parent3197172405b0ee892040c9cd67e096d36fc5f62c (diff)
parentb2fb48cfcff0bfe1fc8ac699786557e34bc1019d (diff)
downloadlibvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.tar
libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.tar.gz
libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.tar.bz2
libvpx-5afa3b91502e3e7113466c3e1717eee5fe5662e6.zip
Merge "improve vpx_filter_block1d* based on replace paddsw+psrlw to pmulhrsw"
Diffstat (limited to 'vpx_dsp/x86')
-rw-r--r--vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm20
1 files changed, 8 insertions, 12 deletions
diff --git a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
index 3c8cfd225..538b2129d 100644
--- a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
@@ -14,14 +14,14 @@
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
+ mov ecx, 0x01000100
movdqa xmm3, [rdx] ;load filters
psrldq xmm3, 6
packsswb xmm3, xmm3
pshuflw xmm3, xmm3, 0b ;k3_k4
- movq xmm2, rcx ;rounding
+ movd xmm2, ecx ;rounding_shift
pshufd xmm2, xmm2, 0
movsxd rax, DWORD PTR arg(1) ;pixels_per_line
@@ -33,8 +33,7 @@
punpcklbw xmm0, xmm1
pmaddubsw xmm0, xmm3
- paddsw xmm0, xmm2 ;rounding
- psraw xmm0, 7 ;shift
+ pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7)
packuswb xmm0, xmm0 ;pack to byte
%if %1
@@ -51,7 +50,7 @@
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
+ mov ecx, 0x01000100
movdqa xmm7, [rdx] ;load filters
psrldq xmm7, 6
@@ -59,7 +58,7 @@
pshuflw xmm7, xmm7, 0b ;k3_k4
punpcklwd xmm7, xmm7
- movq xmm6, rcx ;rounding
+ movd xmm6, ecx ;rounding_shift
pshufd xmm6, xmm6, 0
movsxd rax, DWORD PTR arg(1) ;pixels_per_line
@@ -71,8 +70,7 @@
punpcklbw xmm0, xmm1
pmaddubsw xmm0, xmm7
- paddsw xmm0, xmm6 ;rounding
- psraw xmm0, 7 ;shift
+ pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7)
packuswb xmm0, xmm0 ;pack back to byte
%if %1
@@ -92,10 +90,8 @@
pmaddubsw xmm0, xmm7
pmaddubsw xmm2, xmm7
- paddsw xmm0, xmm6 ;rounding
- paddsw xmm2, xmm6
- psraw xmm0, 7 ;shift
- psraw xmm2, 7
+ pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7)
+ pmulhrsw xmm2, xmm6
packuswb xmm0, xmm2 ;pack back to byte
%if %1