diff options
author | James Zern <jzern@google.com> | 2015-05-20 20:14:30 -0700 |
---|---|---|
committer | James Zern <jzern@google.com> | 2015-05-22 20:12:16 -0700 |
commit | ef2b3cce50cf2c71cefb6bfe4e1aca8c5bd04e3b (patch) | |
tree | 6200098e6126c6bace58fb5fa549cf01fa9c89e9 /vp9/common/x86 | |
parent | 9e420c01da618e46856aa35ac1652850817ce183 (diff) | |
download | libvpx-ef2b3cce50cf2c71cefb6bfe4e1aca8c5bd04e3b.tar libvpx-ef2b3cce50cf2c71cefb6bfe4e1aca8c5bd04e3b.tar.gz libvpx-ef2b3cce50cf2c71cefb6bfe4e1aca8c5bd04e3b.tar.bz2 libvpx-ef2b3cce50cf2c71cefb6bfe4e1aca8c5bd04e3b.zip |
add vp9/common/x86/convolve.h
collect the vp9_convolve function definition macros there; this will
allow some relocation of functions from vp9_asm_stubs.c
Change-Id: Idadd117fa256dd48748379856973fd985b8204e8
Diffstat (limited to 'vp9/common/x86')
-rw-r--r-- | vp9/common/x86/convolve.h | 296 | ||||
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 283 |
2 files changed, 298 insertions, 281 deletions
diff --git a/vp9/common/x86/convolve.h b/vp9/common/x86/convolve.h new file mode 100644 index 000000000..de2df47e5 --- /dev/null +++ b/vp9/common/x86/convolve.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_COMMON_X86_CONVOLVE_H_ +#define VP9_COMMON_X86_CONVOLVE_H_ + +#include <assert.h> + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +typedef void filter8_1dfunction ( + const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter +); + +#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + if (filter[0] || filter[1] || filter[2]) { \ + while (w >= 16) { \ + vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } else { \ + while (w >= 16) { \ + vp9_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + } \ + if (w) { \ + vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h); \ + } \ +} + +#define FUN_CONV_2D(avg, opt) \ +void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ + filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 7); \ + vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h); \ + } else { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ + vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 1); \ + vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h); \ + } \ + } else { \ + vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ + } \ +} + +#if CONFIG_VP9_HIGHBITDEPTH + +typedef void highbd_filter8_1dfunction ( + const uint16_t *src_ptr, + const ptrdiff_t src_pitch, + uint16_t *output_ptr, + ptrdiff_t out_pitch, + unsigned int output_height, + const int16_t *filter, + int bd +); + +#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ + ptrdiff_t src_stride, \ + uint8_t *dst8, \ + ptrdiff_t dst_stride, \ + const int16_t *filter_x, \ + int x_step_q4, \ + const int16_t *filter_y, \ + int y_step_q4, \ + int w, int h, int bd) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + if (filter[0] || filter[1] || filter[2]) { \ + while (w >= 16) { \ + vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } else { \ + while (w >= 16) { \ + vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + } \ + if (w) { \ + vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h, bd); \ + } \ +} + +#define HIGH_FUN_CONV_2D(avg, opt) \ +void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h, int bd) { \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ + filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h + 7, bd); \ + vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ + 64, dst, dst_stride, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h, bd); \ + } else { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ + vp9_highbd_convolve8_horiz_##opt(src, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h + 1, bd); \ + vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ + dst, dst_stride, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h, bd); \ + } \ + } else { \ + vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, \ + h, bd); \ + } \ +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#endif // VP9_COMMON_X86_CONVOLVE_H_ diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 963023c53..baec57bb8 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -8,288 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include <assert.h> - -#include "./vpx_config.h" #include "./vp9_rtcd.h" -#include "vpx_ports/mem.h" - -typedef void filter8_1dfunction ( - const unsigned char *src_ptr, - const ptrdiff_t src_pitch, - unsigned char *output_ptr, - ptrdiff_t out_pitch, - unsigned int output_height, - const short *filter -); - -#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - if (filter[0] || filter[1] || filter[2]) { \ - while (w >= 16) { \ - vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vp9_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h); \ - } \ -} - -#define FUN_CONV_2D(avg, opt) \ -void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ - filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ - DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 71]); \ - vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 7); \ - vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } else { \ - DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 65]); \ - vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 1); \ - vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } \ - } else { \ - vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ - } \ -} - -#if CONFIG_VP9_HIGHBITDEPTH - -typedef void highbd_filter8_1dfunction ( - const uint16_t *src_ptr, - const ptrdiff_t src_pitch, - uint16_t *output_ptr, - ptrdiff_t out_pitch, - unsigned int output_height, - const int16_t *filter, - int bd -); - -#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ - ptrdiff_t src_stride, \ - uint8_t *dst8, \ - ptrdiff_t dst_stride, \ - const int16_t *filter_x, \ - int x_step_q4, \ - const int16_t *filter_y, \ - int y_step_q4, \ - int w, int h, int bd) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ - if (filter[0] || filter[1] || filter[2]) { \ - while (w >= 16) { \ - vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h, bd); \ - } \ -} - -#define HIGH_FUN_CONV_2D(avg, opt) \ -void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h, int bd) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ - filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ - vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 7, bd); \ - vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ - 64, dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ - vp9_highbd_convolve8_horiz_##opt(src, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 1, bd); \ - vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ - dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } \ - } else { \ - vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, \ - h, bd); \ - } \ -} -#endif // CONFIG_VP9_HIGHBITDEPTH +#include "./vpx_config.h" +#include "vp9/common/x86/convolve.h" #if HAVE_AVX2 && HAVE_SSSE3 filter8_1dfunction vp9_filter_block1d16_v8_avx2; |