diff options
author | Linfeng Zhang <linfengz@google.com> | 2017-08-28 10:35:43 -0700 |
---|---|---|
committer | Linfeng Zhang <linfengz@google.com> | 2017-09-05 15:22:36 -0700 |
commit | d331e7a1c0c59d4055a3bfacd051268ec0832b48 (patch) | |
tree | 7309a0384415188af55340224c91d633ac608fbc /vpx_dsp/x86/convolve.h | |
parent | d49a1a5329ea43968faaf295f7da5f72b28f971e (diff) | |
download | libvpx-d331e7a1c0c59d4055a3bfacd051268ec0832b48.tar libvpx-d331e7a1c0c59d4055a3bfacd051268ec0832b48.tar.gz libvpx-d331e7a1c0c59d4055a3bfacd051268ec0832b48.tar.bz2 libvpx-d331e7a1c0c59d4055a3bfacd051268ec0832b48.zip |
Remove get_filter_base() and get_filter_offset() in convolve
so that the convolve functions are independent of table alignment.
Change-Id: Ieab132a30d72c6e75bbe9473544fbe2cf51541ee
Diffstat (limited to 'vpx_dsp/x86/convolve.h')
-rw-r--r-- | vpx_dsp/x86/convolve.h | 239 |
1 files changed, 123 insertions, 116 deletions
diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h index e69d6c617..68d7589d4 100644 --- a/vpx_dsp/x86/convolve.h +++ b/vpx_dsp/x86/convolve.h @@ -20,14 +20,15 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter); -#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ +#define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ void vpx_convolve8_##name##_##opt( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, int w, int h) { \ - (void)filter_x; \ + ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ + int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ + const int16_t *filter = filter_kernel[offset]; \ + (void)x0_q4; \ (void)x_step_q4; \ - (void)filter_y; \ + (void)y0_q4; \ (void)y_step_q4; \ assert(filter[3] != 128); \ assert(step_q4 == 16); \ @@ -64,32 +65,36 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, } \ } -#define FUN_CONV_2D(avg, opt) \ - void vpx_convolve8_##avg##opt( \ - const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, int w, int h) { \ - assert(filter_x[3] != 128); \ - assert(filter_y[3] != 128); \ - assert(w <= 64); \ - assert(h <= 64); \ - assert(x_step_q4 == 16); \ - assert(y_step_q4 == 16); \ - if (filter_x[0] | filter_x[1] | filter_x[2]) { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ - vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, \ - h + 7); \ - vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } else { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ - vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter_x, \ - x_step_q4, filter_y, y_step_q4, w, h + 1); \ - vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter_x, \ - x_step_q4, filter_y, y_step_q4, w, h); \ - } \ +#define FUN_CONV_2D(avg, opt) \ + void vpx_convolve8_##avg##opt( \ + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ + ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ + int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ + const int16_t *filter_x = filter[x0_q4]; \ + const int16_t *filter_y = filter[y0_q4]; \ + (void)filter_y; \ + assert(filter_x[3] != 128); \ + assert(filter_y[3] != 128); \ + assert(w <= 64); \ + assert(h <= 64); \ + assert(x_step_q4 == 16); \ + assert(y_step_q4 == 16); \ + if (filter_x[0] | filter_x[1] | filter_x[2]) { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ + filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ + h + 7); \ + vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ + filter, x0_q4, x_step_q4, y0_q4, \ + y_step_q4, w, h); \ + } else { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ + vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ + x_step_q4, y0_q4, y_step_q4, w, h + 1); \ + vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ + x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ + h); \ + } \ } #if CONFIG_VP9_HIGHBITDEPTH @@ -101,95 +106,97 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, unsigned int output_height, const int16_t *filter, int bd); -#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vpx_highbd_convolve8_##name##_##opt( \ - const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - if (filter[0] | filter[1] | filter[2]) { \ - while (w >= 16) { \ - vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ - src_start, src_stride, dst, dst_stride, h, filter, bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ - src_start, src_stride, dst, dst_stride, h, filter, bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ - src_start, src_stride, dst, dst_stride, h, filter, bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ - src, src_stride, dst, dst_stride, h, filter, bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ - src, src_stride, dst, dst_stride, h, filter, bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ - src, src_stride, dst, dst_stride, h, filter, bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h, bd); \ - } \ - } - -#define HIGH_FUN_CONV_2D(avg, opt) \ - void vpx_highbd_convolve8_##avg##opt( \ +#define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ + void vpx_highbd_convolve8_##name##_##opt( \ const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ - vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ - fdata2, 64, filter_x, x_step_q4, \ - filter_y, y_step_q4, w, h + 7, bd); \ - vpx_highbd_convolve8_##avg##vert_##opt( \ - fdata2 + 192, 64, dst, dst_stride, filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h, bd); \ + ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ + int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ + const int16_t *filter = filter_kernel[offset]; \ + if (step_q4 == 16 && filter[3] != 128) { \ + if (filter[0] | filter[1] | filter[2]) { \ + while (w >= 16) { \ + vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ + src_start, src_stride, dst, dst_stride, h, filter, bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ + src_start, src_stride, dst, dst_stride, h, filter, bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ + src_start, src_stride, dst, dst_stride, h, filter, bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ - vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h + 1, bd); \ - vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h, bd); \ + while (w >= 16) { \ + vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ + src, src_stride, dst, dst_stride, h, filter, bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ + src, src_stride, dst, dst_stride, h, filter, bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ + src, src_stride, dst, dst_stride, h, filter, bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ } \ - } else { \ - vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h, bd); \ } \ + if (w) { \ + vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ + filter_kernel, x0_q4, x_step_q4, y0_q4, \ + y_step_q4, w, h, bd); \ + } \ + } + +#define HIGH_FUN_CONV_2D(avg, opt) \ + void vpx_highbd_convolve8_##avg##opt( \ + const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ + ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ + int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ + const int16_t *filter_x = filter[x0_q4]; \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ + fdata2, 64, filter, x0_q4, x_step_q4, \ + y0_q4, y_step_q4, w, h + 7, bd); \ + vpx_highbd_convolve8_##avg##vert_##opt( \ + fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \ + y0_q4, y_step_q4, w, h, bd); \ + } else { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ + vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ + x0_q4, x_step_q4, y0_q4, y_step_q4, \ + w, h + 1, bd); \ + vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ + filter, x0_q4, x_step_q4, \ + y0_q4, y_step_q4, w, h, bd); \ + } \ + } else { \ + vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \ + x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \ + bd); \ + } \ } #endif // CONFIG_VP9_HIGHBITDEPTH |