diff options
author | Johann <johannkoenig@google.com> | 2016-09-08 18:46:14 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2016-09-15 23:16:26 -0700 |
commit | e813c2b416508784a89bb5ef466625c80f0ac188 (patch) | |
tree | 826e27f66fd0961af445be8a873832f6e1480234 /vp8 | |
parent | caf9a7841eff9e9931f6b85cb4226a4e247b8466 (diff) | |
download | libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.tar libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.tar.gz libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.tar.bz2 libvpx-e813c2b416508784a89bb5ef466625c80f0ac188.zip |
Enable ssse3 bilinear tests
The code only has issues when xoffset == 0 and yoffset == 0 which
represents a simple copy. Presumably this case does not need to be
handled because the issue has existed since 2010.
BUG=webm:1287
Change-Id: Ic47e2653f3b729e99b40e53d8d2d8d1501edaaa9
Diffstat (limited to 'vp8')
-rw-r--r-- | vp8/common/filter.c | 31 | ||||
-rw-r--r-- | vp8/common/x86/subpixel_ssse3.asm | 6 |
2 files changed, 17 insertions, 20 deletions
diff --git a/vp8/common/filter.c b/vp8/common/filter.c index a312efb6c..267498335 100644 --- a/vp8/common/filter.c +++ b/vp8/common/filter.c @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "filter.h" +#include <assert.h> #include "./vp8_rtcd.h" +#include "vp8/common/filter.h" DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, @@ -324,27 +325,11 @@ void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter; const short *VFilter; + // This represents a copy and is not required to be handled by optimizations. + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; -#if 0 - { - int i; - unsigned char temp1[16]; - unsigned char temp2[16]; - - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - - for (i = 0; i < 16; ++i) - { - if (temp1[i] != temp2[i]) - { - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - } - } - } -#endif filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); } @@ -355,6 +340,8 @@ void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter; const short *VFilter; + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; @@ -368,6 +355,8 @@ void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter; const short *VFilter; + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; @@ -382,6 +371,8 @@ void vp8_bilinear_predict16x16_c(unsigned char *src_ptr, const short *HFilter; const short *VFilter; + assert((xoffset | yoffset) != 0); + HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index c06f24556..1f6cbd1d1 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -1291,6 +1291,8 @@ sym(vp8_bilinear_predict8x8_ssse3): movq xmm7, XMMWORD PTR [rsp+96] punpcklbw xmm5, xmm6 + ; Because the source register (xmm0) is always treated as signed by + ; pmaddubsw, the constant '128' is treated as '-128'. pmaddubsw xmm1, xmm0 pmaddubsw xmm2, xmm0 @@ -1319,6 +1321,10 @@ sym(vp8_bilinear_predict8x8_ssse3): psraw xmm5, VP8_FILTER_SHIFT psraw xmm6, VP8_FILTER_SHIFT + + ; Having multiplied everything by '-128' and obtained negative + ; numbers, the unsigned saturation truncates those values to 0, + ; resulting in incorrect handling of xoffset == 0 && yoffset == 0 packuswb xmm1, xmm1 packuswb xmm2, xmm2 |