diff options
author | Scott LaVarnway <slavarnway@google.com> | 2013-02-12 08:45:24 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2013-02-12 08:45:24 -0800 |
commit | ff024f812b8d37ad74429e80b9730cfef9046e8f (patch) | |
tree | 39604f38cc833fa7896c8321ee4e71e45aa9f12e /vp9/common/x86/vp9_asm_stubs.c | |
parent | 094e2572df23fd849688c2f95380b184c071efec (diff) | |
parent | eda30b410e878c56ded32321d2c70765caa6e4b8 (diff) | |
download | libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.tar libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.tar.gz libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.tar.bz2 libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.zip |
Merge "Bug fix: ssse3 version of subpixel did not match C code" into experimental
Diffstat (limited to 'vp9/common/x86/vp9_asm_stubs.c')
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 3e2346f29..fbc95b6ce 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -65,6 +65,20 @@ void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr, unsigned int output_height, const short *filter); +void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -87,6 +101,14 @@ void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, dst += 8; w -= 8; } + while (w >= 4) { + vp9_filter_block1d4_h8_ssse3(src, src_stride, + dst, dst_stride, + h, filter_x); + src += 4; + dst += 4; + w -= 4; + } } if (w) { vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, @@ -117,6 +139,14 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride, dst += 8; w -= 8; } + while (w >= 4) { + vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride, + dst, dst_stride, + h, filter_y); + src += 4; + dst += 4; + w -= 4; + } } if (w) { vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, @@ -156,6 +186,15 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, h, filter_y); return; } + if (w == 4) { + vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, + fdata2, 16, + h + 7, filter_x); + vp9_filter_block1d4_v8_ssse3(fdata2, 16, + dst, dst_stride, + h, filter_y); + return; + } } vp9_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, |