summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2013-11-20 12:52:56 -0800
committerYunqing Wang <yunqingwang@google.com>2013-11-20 12:52:56 -0800
commit256cf7ee7d535ee26487a7e0352655d76932afa2 (patch)
tree85a47df37ce88908bc28141d16ece42b6b133882 /vp9
parent25aae73a30916cfdc5a2343365531e7357227a51 (diff)
downloadlibvpx-256cf7ee7d535ee26487a7e0352655d76932afa2.tar
libvpx-256cf7ee7d535ee26487a7e0352655d76932afa2.tar.gz
libvpx-256cf7ee7d535ee26487a7e0352655d76932afa2.tar.bz2
libvpx-256cf7ee7d535ee26487a7e0352655d76932afa2.zip
Correct ssse3 8/16-pixel wide sub-pixel filter calculation
Although no mismatch was indicated for 8/16 wide sub-pixel filters in issue 661, they had similar problems that could cause mismatch potentially. This patch fixed calculations in HORIZx8/16 and VERTx8/16. Change-Id: I169961c9d40a20340995b7d22aafc89ccf30bfca
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_ssse3.asm31
1 files changed, 24 insertions, 7 deletions
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index 17881ed47..634fa7746 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -158,10 +158,13 @@
pmaddubsw xmm6, k6k7
paddsw xmm0, xmm6
- paddsw xmm0, xmm2
+ movdqa xmm1, xmm2
+ pmaxsw xmm2, xmm4
+ pminsw xmm4, xmm1
paddsw xmm0, xmm4
- paddsw xmm0, krd
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -243,10 +246,13 @@
pmaddubsw xmm6, k6k7
paddsw xmm0, xmm6
- paddsw xmm0, xmm2
+ movdqa xmm1, xmm2
+ pmaxsw xmm2, xmm4
+ pminsw xmm4, xmm1
paddsw xmm0, xmm4
- paddsw xmm0, krd
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
%if %1
@@ -635,9 +641,13 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
pmaddubsw %3, k4k5
pmaddubsw %4, k6k7
- paddsw %1, %2
paddsw %1, %4
+ movdqa %4, %2
+ pmaxsw %2, %3
+ pminsw %3, %4
paddsw %1, %3
+ paddsw %1, %2
+
paddsw %1, krd
psraw %1, 7
packuswb %1, %1
@@ -783,12 +793,19 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
pmaddubsw xmm6, k4k5
pmaddubsw xmm7, k6k7
- paddsw xmm0, xmm1
paddsw xmm0, xmm3
+ movdqa xmm3, xmm1
+ pmaxsw xmm1, xmm2
+ pminsw xmm2, xmm3
paddsw xmm0, xmm2
- paddsw xmm4, xmm5
+ paddsw xmm0, xmm1
+
paddsw xmm4, xmm7
+ movdqa xmm7, xmm5
+ pmaxsw xmm5, xmm6
+ pminsw xmm6, xmm7
paddsw xmm4, xmm6
+ paddsw xmm4, xmm5
paddsw xmm0, krd
paddsw xmm4, krd