From e813c2b416508784a89bb5ef466625c80f0ac188 Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 8 Sep 2016 18:46:14 -0700 Subject: Enable ssse3 bilinear tests The code only has issues when xoffset == 0 and yoffset == 0 which represents a simple copy. Presumably this case does not need to be handled because the issue has existed since 2010. BUG=webm:1287 Change-Id: Ic47e2653f3b729e99b40e53d8d2d8d1501edaaa9 --- test/predict_test.cc | 13 +++++++++++-- vp8/common/filter.c | 31 +++++++++++-------------------- vp8/common/x86/subpixel_ssse3.asm | 6 ++++++ 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/test/predict_test.cc b/test/predict_test.cc index dd797c61a..5672ca937 100644 --- a/test/predict_test.cc +++ b/test/predict_test.cc @@ -146,9 +146,15 @@ class PredictTestBase : public ::testing::TestWithParam { void TestWithRandomData(PredictFunc reference) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - // Run tests for all possible offsets. + // Run tests for almost all possible offsets. for (int xoffset = 0; xoffset < 8; ++xoffset) { for (int yoffset = 0; yoffset < 8; ++yoffset) { + if (xoffset == 0 && yoffset == 0) { + // This represents a copy which is not required to be handled by this + // module. + continue; + } + for (int i = 0; i < kSrcSize; ++i) { src_[i] = rnd.Rand8(); } @@ -172,6 +178,9 @@ class PredictTestBase : public ::testing::TestWithParam { if (width_ == 4 && height_ == 4) { for (int xoffset = 0; xoffset < 8; ++xoffset) { for (int yoffset = 0; yoffset < 8; ++yoffset) { + if (xoffset == 0 && yoffset == 0) { + continue; + } for (int i = 0; i < kSrcSize; ++i) { src_[i] = rnd.Rand8(); } @@ -354,7 +363,7 @@ INSTANTIATE_TEST_CASE_P( #endif #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( - DISABLED_SSSE3, BilinearPredictTest, + SSSE3, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3), make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3))); #endif diff --git a/vp8/common/filter.c b/vp8/common/filter.c index a312efb6c..267498335 100644 --- a/vp8/common/filter.c +++ b/vp8/common/filter.c @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "filter.h" +#include #include "./vp8_rtcd.h" +#include "vp8/common/filter.h" DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, @@ -324,27 +325,11 @@ void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter; const short *VFilter; + // This represents a copy and is not required to be handled by optimizations. + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; -#if 0 - { - int i; - unsigned char temp1[16]; - unsigned char temp2[16]; - - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - - for (i = 0; i < 16; ++i) - { - if (temp1[i] != temp2[i]) - { - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - } - } - } -#endif filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); } @@ -355,6 +340,8 @@ void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter; const short *VFilter; + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; @@ -368,6 +355,8 @@ void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter; const short *VFilter; + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; @@ -382,6 +371,8 @@ void vp8_bilinear_predict16x16_c(unsigned char *src_ptr, const short *HFilter; const short *VFilter; + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index c06f24556..1f6cbd1d1 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -1291,6 +1291,8 @@ sym(vp8_bilinear_predict8x8_ssse3): movq xmm7, XMMWORD PTR [rsp+96] punpcklbw xmm5, xmm6 + ; Because the source register (xmm0) is always treated as signed by + ; pmaddubsw, the constant '128' is treated as '-128'. pmaddubsw xmm1, xmm0 pmaddubsw xmm2, xmm0 @@ -1319,6 +1321,10 @@ sym(vp8_bilinear_predict8x8_ssse3): psraw xmm5, VP8_FILTER_SHIFT psraw xmm6, VP8_FILTER_SHIFT + + ; Having multiplied everything by '-128' and obtained negative + ; numbers, the unsigned saturation truncates those values to 0, + ; resulting in incorrect handling of xoffset == 0 && yoffset == 0 packuswb xmm1, xmm1 packuswb xmm2, xmm2 -- cgit v1.2.3