summaryrefslogtreecommitdiff
path: root/vp9/common/x86/vp9_asm_stubs.c
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2013-02-12 08:45:24 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-02-12 08:45:24 -0800
commitff024f812b8d37ad74429e80b9730cfef9046e8f (patch)
tree39604f38cc833fa7896c8321ee4e71e45aa9f12e /vp9/common/x86/vp9_asm_stubs.c
parent094e2572df23fd849688c2f95380b184c071efec (diff)
parenteda30b410e878c56ded32321d2c70765caa6e4b8 (diff)
downloadlibvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.tar
libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.tar.gz
libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.tar.bz2
libvpx-ff024f812b8d37ad74429e80b9730cfef9046e8f.zip
Merge "Bug fix: ssse3 version of subpixel did not match C code" into experimental
Diffstat (limited to 'vp9/common/x86/vp9_asm_stubs.c')
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 3e2346f29..fbc95b6ce 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -65,6 +65,20 @@ void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
unsigned int output_height,
const short *filter);
+void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
@@ -87,6 +101,14 @@ void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
dst += 8;
w -= 8;
}
+ while (w >= 4) {
+ vp9_filter_block1d4_h8_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
if (w) {
vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
@@ -117,6 +139,14 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride,
dst += 8;
w -= 8;
}
+ while (w >= 4) {
+ vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
if (w) {
vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
@@ -156,6 +186,15 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
h, filter_y);
return;
}
+ if (w == 4) {
+ vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d4_v8_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
+ }
}
vp9_convolve8_c(src, src_stride, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,