diff options
author | Deb Mukherjee <debargha@google.com> | 2014-09-16 12:47:18 -0700 |
---|---|---|
committer | Deb Mukherjee <debargha@google.com> | 2014-09-18 07:26:17 -0700 |
commit | 0d3c3d3ce74c3a1c30c8bc17f3389ac19e1ace5b (patch) | |
tree | 2b115665b5f3eeaa43c0ee2901b2497c4d714c4c /vp9/common/x86/vp9_asm_stubs.c | |
parent | d3a7e677e64f0b8a99b30b522005d7fd657fc257 (diff) | |
download | libvpx-0d3c3d3ce74c3a1c30c8bc17f3389ac19e1ace5b.tar libvpx-0d3c3d3ce74c3a1c30c8bc17f3389ac19e1ace5b.tar.gz libvpx-0d3c3d3ce74c3a1c30c8bc17f3389ac19e1ace5b.tar.bz2 libvpx-0d3c3d3ce74c3a1c30c8bc17f3389ac19e1ace5b.zip |
Adds high bitdepth convolve, interpred & scaling
Change-Id: Ie51c352a6b250547207cbc1ebba833a01ed053e3
Diffstat (limited to 'vp9/common/x86/vp9_asm_stubs.c')
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index b6847b92e..407573aee 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -139,6 +139,153 @@ void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ } \ } + +#if CONFIG_VP9_HIGHBITDEPTH + +typedef void high_filter8_1dfunction ( + const uint16_t *src_ptr, + const ptrdiff_t src_pitch, + uint16_t *output_ptr, + ptrdiff_t out_pitch, + unsigned int output_height, + const int16_t *filter, + int bd +); + +#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vp9_high_convolve8_##name##_##opt(const uint8_t *src8, \ + ptrdiff_t src_stride, \ + uint8_t *dst8, ptrdiff_t dst_stride, \ + const int16_t *filter_x, \ + int x_step_q4, \ + const int16_t *filter_y, \ + int y_step_q4, \ + int w, int h, int bd) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + if (filter[0] || filter[1] || filter[2]) { \ + while (w >= 16) { \ + vp9_high_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_high_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_high_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } else { \ + while (w >= 16) { \ + vp9_high_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_high_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_high_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + } \ + if (w) { \ + vp9_high_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h, bd); \ + } \ +} + +#define HIGH_FUN_CONV_2D(avg, opt) \ +void vp9_high_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h, int bd) { \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ + filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ + DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 71); \ + vp9_high_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 7, bd); \ + vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ + 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h, bd); \ + } else { \ + DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 65); \ + vp9_high_convolve8_horiz_##opt(src, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 1, bd); \ + vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ + dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h, bd); \ + } \ + } else { \ + vp9_high_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, \ + h, bd); \ + } \ +} +#endif // CONFIG_VP9_HIGHBITDEPTH + #if HAVE_AVX2 && HAVE_SSSE3 filter8_1dfunction vp9_filter_block1d16_v8_avx2; filter8_1dfunction vp9_filter_block1d16_h8_avx2; @@ -336,4 +483,75 @@ FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); // int w, int h); FUN_CONV_2D(, sse2); FUN_CONV_2D(avg_ , sse2); + +#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 +high_filter8_1dfunction vp9_high_filter_block1d16_v8_sse2; +high_filter8_1dfunction vp9_high_filter_block1d16_h8_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_v8_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_h8_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_v8_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_h8_sse2; +high_filter8_1dfunction vp9_high_filter_block1d16_v8_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d16_h8_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_v8_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_h8_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_v8_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_h8_avg_sse2; + +high_filter8_1dfunction vp9_high_filter_block1d16_v2_sse2; +high_filter8_1dfunction vp9_high_filter_block1d16_h2_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_v2_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_h2_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_v2_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_h2_sse2; +high_filter8_1dfunction vp9_high_filter_block1d16_v2_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d16_h2_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_v2_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d8_h2_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_v2_avg_sse2; +high_filter8_1dfunction vp9_high_filter_block1d4_h2_avg_sse2; + +// void vp9_high_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +// void vp9_high_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +// void vp9_high_convolve8_avg_horiz_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vp9_high_convolve8_avg_vert_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); +HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); +HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); +HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, + sse2); + +// void vp9_high_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +// void vp9_high_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +HIGH_FUN_CONV_2D(, sse2); +HIGH_FUN_CONV_2D(avg_ , sse2); +#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 #endif // HAVE_SSE2 |