diff options
author | Yunqing Wang <yunqingwang@google.com> | 2013-11-21 09:40:02 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2013-11-21 09:40:02 -0800 |
commit | 6c4964602aa5fa4086f95ced5d10d3432ed07284 (patch) | |
tree | 7d4378cc2aabe1227da9518e5637e484faebbdc3 | |
parent | 44dd3274daf74a746c225ac2274a9fba378b5b09 (diff) | |
parent | 256cf7ee7d535ee26487a7e0352655d76932afa2 (diff) | |
download | libvpx-6c4964602aa5fa4086f95ced5d10d3432ed07284.tar libvpx-6c4964602aa5fa4086f95ced5d10d3432ed07284.tar.gz libvpx-6c4964602aa5fa4086f95ced5d10d3432ed07284.tar.bz2 libvpx-6c4964602aa5fa4086f95ced5d10d3432ed07284.zip |
Merge "Correct ssse3 8/16-pixel wide sub-pixel filter calculation"
-rw-r--r-- | vp9/common/x86/vp9_subpixel_8t_ssse3.asm | 31 |
1 files changed, 24 insertions, 7 deletions
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index 17881ed47..634fa7746 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -158,10 +158,13 @@ pmaddubsw xmm6, k6k7 paddsw xmm0, xmm6 - paddsw xmm0, xmm2 + movdqa xmm1, xmm2 + pmaxsw xmm2, xmm4 + pminsw xmm4, xmm1 paddsw xmm0, xmm4 - paddsw xmm0, krd + paddsw xmm0, xmm2 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -243,10 +246,13 @@ pmaddubsw xmm6, k6k7 paddsw xmm0, xmm6 - paddsw xmm0, xmm2 + movdqa xmm1, xmm2 + pmaxsw xmm2, xmm4 + pminsw xmm4, xmm1 paddsw xmm0, xmm4 - paddsw xmm0, krd + paddsw xmm0, xmm2 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 %if %1 @@ -635,9 +641,13 @@ sym(vp9_filter_block1d16_v8_avg_ssse3): pmaddubsw %3, k4k5 pmaddubsw %4, k6k7 - paddsw %1, %2 paddsw %1, %4 + movdqa %4, %2 + pmaxsw %2, %3 + pminsw %3, %4 paddsw %1, %3 + paddsw %1, %2 + paddsw %1, krd psraw %1, 7 packuswb %1, %1 @@ -783,12 +793,19 @@ sym(vp9_filter_block1d16_v8_avg_ssse3): pmaddubsw xmm6, k4k5 pmaddubsw xmm7, k6k7 - paddsw xmm0, xmm1 paddsw xmm0, xmm3 + movdqa xmm3, xmm1 + pmaxsw xmm1, xmm2 + pminsw xmm2, xmm3 paddsw xmm0, xmm2 - paddsw xmm4, xmm5 + paddsw xmm0, xmm1 + paddsw xmm4, xmm7 + movdqa xmm7, xmm5 + pmaxsw xmm5, xmm6 + pminsw xmm6, xmm7 paddsw xmm4, xmm6 + paddsw xmm4, xmm5 paddsw xmm0, krd paddsw xmm4, krd |