diff options
Diffstat (limited to 'vp9/common/vp9_convolve.c')
-rw-r--r-- | vp9/common/vp9_convolve.c | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index b062e7dc7..3ab8bec7a 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -122,6 +122,78 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride, } } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT + +static inline uint8_t combine_qtr(uint8_t a, uint8_t b) { + return (((a) + (b) * 3 + 2) >> 2); +} + +static inline uint8_t combine_3qtr(uint8_t a, uint8_t b) { + return (((a) * 3 + (b) + 2) >> 2); +} + +static inline uint8_t combine_1by8(uint8_t a, uint8_t b) { + return (((a) * 1 + (b) * 7 + 4) >> 3); +} + +static inline uint8_t combine_3by8(uint8_t a, uint8_t b) { + return (((a) * 3 + (b) * 5 + 4) >> 3); +} + +static inline uint8_t combine_5by8(uint8_t a, uint8_t b) { + return (((a) * 5 + (b) * 3 + 4) >> 3); +} + +static inline uint8_t combine_7by8(uint8_t a, uint8_t b) { + return (((a) * 7 + (b) * 1 + 4) >> 3); +} + +// TODO(debargha): Implment with a separate weight parameter +static void convolve_wtd_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x0, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int taps, + uint8_t (*combine)(uint8_t a, uint8_t b)) { + int x, y, k, sum; + const int16_t *filter_x_base = filter_x0; + +#if ALIGN_FILTERS_256 + filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source line */ + src -= taps / 2 - 1; + + for (y = 0; y < h; ++y) { + /* Pointer to filter to use */ + const int16_t *filter_x = filter_x0; + + /* Initial phase offset */ + int x0_q4 = (filter_x - filter_x_base) / taps; + int x_q4 = x0_q4; + + for (x = 0; x < w; ++x) { + /* Per-pixel src offset */ + int src_x = (x_q4 - x0_q4) >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[src_x + k] * filter_x[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[x] = combine(dst[x], clip_pixel(sum >> VP9_FILTER_SHIFT)); + + /* Adjust source and filter to use for the next pixel */ + x_q4 += x_step_q4; + filter_x = filter_x_base + (x_q4 & 0xf) * taps; + } + src += src_stride; + dst += dst_stride; + } +} + +#endif + static void convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -207,6 +279,52 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride, } } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +static void convolve_wtd_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y0, int y_step_q4, + int w, int h, int taps, + uint8_t (*combine)(uint8_t a, uint8_t b)) { + int x, y, k, sum; + + const int16_t *filter_y_base = filter_y0; + +#if ALIGN_FILTERS_256 + filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source column */ + src -= src_stride * (taps / 2 - 1); + for (x = 0; x < w; ++x) { + /* Pointer to filter to use */ + const int16_t *filter_y = filter_y0; + + /* Initial phase offset */ + int y0_q4 = (filter_y - filter_y_base) / taps; + int y_q4 = y0_q4; + + for (y = 0; y < h; ++y) { + /* Per-pixel src offset */ + int src_y = (y_q4 - y0_q4) >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[(src_y + k) * src_stride] * filter_y[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[y * dst_stride] = combine(dst[y * dst_stride], + clip_pixel(sum >> VP9_FILTER_SHIFT)); + + /* Adjust source and filter to use for the next pixel */ + y_q4 += y_step_q4; + filter_y = filter_y_base + (y_q4 & 0xf) * taps; + } + ++src; + ++dst; + } +} +#endif + static void convolve_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -285,6 +403,68 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride, w, h, 8); } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve8_1by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_1by8); +} + +void vp9_convolve8_qtr_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_qtr); +} + +void vp9_convolve8_3by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3by8); +} + +void vp9_convolve8_5by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_5by8); +} + +void vp9_convolve8_3qtr_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3qtr); +} + +void vp9_convolve8_7by8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_7by8); +} +#endif + void vp9_convolve8_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -305,6 +485,68 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride, w, h, 8); } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve8_1by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_1by8); +} + +void vp9_convolve8_qtr_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_qtr); +} + +void vp9_convolve8_3by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3by8); +} + +void vp9_convolve8_5by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_5by8); +} + +void vp9_convolve8_3qtr_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_3qtr); +} + +void vp9_convolve8_7by8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_wtd_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8, combine_7by8); +} +#endif + void vp9_convolve8_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, @@ -337,6 +579,140 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride, w, h); } +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve8_1by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_1by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_qtr_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_qtr(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_3by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_3by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_5by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_5by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_3qtr_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_3qtr(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} + +void vp9_convolve8_7by8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16); + assert(w <= 16); + assert(h <= 16); + + vp9_convolve8(src, src_stride, + temp, 16, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + vp9_convolve_7by8(temp, 16, + dst, dst_stride, + NULL, 0, /* These unused parameter should be removed! */ + NULL, 0, /* These unused parameter should be removed! */ + w, h); +} +#endif + void vp9_convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int filter_x_stride, @@ -374,3 +750,101 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, dst += dst_stride; } } + +#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +void vp9_convolve_1by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_1by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_qtr(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_qtr(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_3by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_3by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_5by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_5by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_3qtr(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_3qtr(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} + +void vp9_convolve_7by8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = combine_7by8(dst[x], src[x]); + } + src += src_stride; + dst += dst_stride; + } +} +#endif |