summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2016-09-08 18:46:14 -0700
committerJohann <johannkoenig@google.com>2016-09-15 23:16:26 -0700
commite813c2b416508784a89bb5ef466625c80f0ac188 (patch)
tree826e27f66fd0961af445be8a873832f6e1480234
parentcaf9a7841eff9e9931f6b85cb4226a4e247b8466 (diff)
downloadlibvpx-e813c2b416508784a89bb5ef466625c80f0ac188.tar
libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.tar.gz
libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.tar.bz2
libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.zip
Enable ssse3 bilinear tests
The code only has issues when xoffset == 0 and yoffset == 0 which represents a simple copy. Presumably this case does not need to be handled because the issue has existed since 2010. BUG=webm:1287 Change-Id: Ic47e2653f3b729e99b40e53d8d2d8d1501edaaa9
-rw-r--r--test/predict_test.cc13
-rw-r--r--vp8/common/filter.c31
-rw-r--r--vp8/common/x86/subpixel_ssse3.asm6
3 files changed, 28 insertions, 22 deletions
diff --git a/test/predict_test.cc b/test/predict_test.cc
index dd797c61a..5672ca937 100644
--- a/test/predict_test.cc
+++ b/test/predict_test.cc
@@ -146,9 +146,15 @@ class PredictTestBase : public ::testing::TestWithParam<PredictParam> {
void TestWithRandomData(PredictFunc reference) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
- // Run tests for all possible offsets.
+ // Run tests for almost all possible offsets.
for (int xoffset = 0; xoffset < 8; ++xoffset) {
for (int yoffset = 0; yoffset < 8; ++yoffset) {
+ if (xoffset == 0 && yoffset == 0) {
+ // This represents a copy which is not required to be handled by this
+ // module.
+ continue;
+ }
+
for (int i = 0; i < kSrcSize; ++i) {
src_[i] = rnd.Rand8();
}
@@ -172,6 +178,9 @@ class PredictTestBase : public ::testing::TestWithParam<PredictParam> {
if (width_ == 4 && height_ == 4) {
for (int xoffset = 0; xoffset < 8; ++xoffset) {
for (int yoffset = 0; yoffset < 8; ++yoffset) {
+ if (xoffset == 0 && yoffset == 0) {
+ continue;
+ }
for (int i = 0; i < kSrcSize; ++i) {
src_[i] = rnd.Rand8();
}
@@ -354,7 +363,7 @@ INSTANTIATE_TEST_CASE_P(
#endif
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(
- DISABLED_SSSE3, BilinearPredictTest,
+ SSSE3, BilinearPredictTest,
::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3),
make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3)));
#endif
diff --git a/vp8/common/filter.c b/vp8/common/filter.c
index a312efb6c..267498335 100644
--- a/vp8/common/filter.c
+++ b/vp8/common/filter.c
@@ -8,8 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "filter.h"
+#include <assert.h>
#include "./vp8_rtcd.h"
+#include "vp8/common/filter.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = {
{ 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
@@ -324,27 +325,11 @@ void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
const short *HFilter;
const short *VFilter;
+ // This represents a copy and is not required to be handled by optimizations.
+ assert((xoffset | yoffset) != 0);
+
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
-#if 0
- {
- int i;
- unsigned char temp1[16];
- unsigned char temp2[16];
-
- bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
- filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
-
- for (i = 0; i < 16; ++i)
- {
- if (temp1[i] != temp2[i])
- {
- bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
- filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
- }
- }
- }
-#endif
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
VFilter, 4, 4);
}
@@ -355,6 +340,8 @@ void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
const short *HFilter;
const short *VFilter;
+ assert((xoffset | yoffset) != 0);
+
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
@@ -368,6 +355,8 @@ void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
const short *HFilter;
const short *VFilter;
+ assert((xoffset | yoffset) != 0);
+
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
@@ -382,6 +371,8 @@ void vp8_bilinear_predict16x16_c(unsigned char *src_ptr,
const short *HFilter;
const short *VFilter;
+ assert((xoffset | yoffset) != 0);
+
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index c06f24556..1f6cbd1d1 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -1291,6 +1291,8 @@ sym(vp8_bilinear_predict8x8_ssse3):
movq xmm7, XMMWORD PTR [rsp+96]
punpcklbw xmm5, xmm6
+ ; Because the source register (xmm0) is always treated as signed by
+ ; pmaddubsw, the constant '128' is treated as '-128'.
pmaddubsw xmm1, xmm0
pmaddubsw xmm2, xmm0
@@ -1319,6 +1321,10 @@ sym(vp8_bilinear_predict8x8_ssse3):
psraw xmm5, VP8_FILTER_SHIFT
psraw xmm6, VP8_FILTER_SHIFT
+
+ ; Having multiplied everything by '-128' and obtained negative
+ ; numbers, the unsigned saturation truncates those values to 0,
+ ; resulting in incorrect handling of xoffset == 0 && yoffset == 0
packuswb xmm1, xmm1
packuswb xmm2, xmm2