diff options
Diffstat (limited to 'vp9/common/x86/vp9_asm_stubs.c')
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 3e2346f29..fbc95b6ce 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -65,6 +65,20 @@ void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr, unsigned int output_height, const short *filter); +void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + +void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr, + const unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + const short *filter); + void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -87,6 +101,14 @@ void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride, dst += 8; w -= 8; } + while (w >= 4) { + vp9_filter_block1d4_h8_ssse3(src, src_stride, + dst, dst_stride, + h, filter_x); + src += 4; + dst += 4; + w -= 4; + } } if (w) { vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, @@ -117,6 +139,14 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride, dst += 8; w -= 8; } + while (w >= 4) { + vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride, + dst, dst_stride, + h, filter_y); + src += 4; + dst += 4; + w -= 4; + } } if (w) { vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, @@ -156,6 +186,15 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, h, filter_y); return; } + if (w == 4) { + vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride, + fdata2, 16, + h + 7, filter_x); + vp9_filter_block1d4_v8_ssse3(fdata2, 16, + dst, dst_stride, + h, filter_y); + return; + } } vp9_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, |