summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/convolve_test.cc490
-rw-r--r--test/test.mk1
-rw-r--r--vp9/common/vp9_convolve.c299
-rw-r--r--vp9/common/vp9_convolve.h43
-rw-r--r--vp9/common/vp9_rtcd_defs.sh18
-rw-r--r--vp9/vp9_common.mk2
6 files changed, 853 insertions, 0 deletions
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
new file mode 100644
index 000000000..354384063
--- /dev/null
+++ b/test/convolve_test.cc
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+extern "C" {
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+}
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h);
+
+struct ConvolveFunctions {
+ ConvolveFunctions(convolve_fn_t h8, convolve_fn_t h8_avg,
+ convolve_fn_t v8, convolve_fn_t v8_avg,
+ convolve_fn_t hv8, convolve_fn_t hv8_avg)
+ : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
+ hv8_avg_(hv8_avg) {}
+
+ convolve_fn_t h8_;
+ convolve_fn_t v8_;
+ convolve_fn_t hv8_;
+ convolve_fn_t h8_avg_;
+ convolve_fn_t v8_avg_;
+ convolve_fn_t hv8_avg_;
+};
+
+// Reference 8-tap subpixel filter, slightly modified to fit into this test.
+#define VP9_FILTER_WEIGHT 128
+#define VP9_FILTER_SHIFT 7
+static uint8_t clip_pixel(int x) {
+ return x < 0 ? 0 :
+ x > 255 ? 255 :
+ x;
+}
+
+static void filter_block2d_8_c(const uint8_t *src_ptr,
+ const unsigned int src_stride,
+ const int16_t *HFilter,
+ const int16_t *VFilter,
+ uint8_t *dst_ptr,
+ unsigned int dst_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ // Between passes, we use an intermediate buffer whose height is extended to
+ // have enough horizontally filtered values as input for the vertical pass.
+ // This buffer is allocated to be big enough for the largest block type we
+ // support.
+ const int kInterp_Extend = 4;
+ const unsigned int intermediate_height =
+ (kInterp_Extend - 1) + output_height + kInterp_Extend;
+
+ /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+ * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+ * + kInterp_Extend
+ * = 3 + 16 + 4
+ * = 23
+ * and filter_max_width = 16
+ */
+ uint8_t intermediate_buffer[23 * 16];
+ const int intermediate_next_stride = 1 - intermediate_height * output_width;
+
+ // Horizontal pass (src -> transposed intermediate).
+ {
+ uint8_t *output_ptr = intermediate_buffer;
+ const int src_next_row_stride = src_stride - output_width;
+ unsigned int i, j;
+ src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+ for (i = 0; i < intermediate_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ // Apply filter...
+ int temp = ((int)src_ptr[0] * HFilter[0]) +
+ ((int)src_ptr[1] * HFilter[1]) +
+ ((int)src_ptr[2] * HFilter[2]) +
+ ((int)src_ptr[3] * HFilter[3]) +
+ ((int)src_ptr[4] * HFilter[4]) +
+ ((int)src_ptr[5] * HFilter[5]) +
+ ((int)src_ptr[6] * HFilter[6]) +
+ ((int)src_ptr[7] * HFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255...
+ *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
+ ++src_ptr;
+ output_ptr += intermediate_height;
+ }
+ src_ptr += src_next_row_stride;
+ output_ptr += intermediate_next_stride;
+ }
+ }
+
+ // Vertical pass (transposed intermediate -> dst).
+ {
+ uint8_t *src_ptr = intermediate_buffer;
+ const int dst_next_row_stride = dst_stride - output_width;
+ unsigned int i, j;
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ // Apply filter...
+ int temp = ((int)src_ptr[0] * VFilter[0]) +
+ ((int)src_ptr[1] * VFilter[1]) +
+ ((int)src_ptr[2] * VFilter[2]) +
+ ((int)src_ptr[3] * VFilter[3]) +
+ ((int)src_ptr[4] * VFilter[4]) +
+ ((int)src_ptr[5] * VFilter[5]) +
+ ((int)src_ptr[6] * VFilter[6]) +
+ ((int)src_ptr[7] * VFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255...
+ *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
+ src_ptr += intermediate_height;
+ }
+ src_ptr += intermediate_next_stride;
+ dst_ptr += dst_next_row_stride;
+ }
+ }
+}
+
+static void block2d_average_c(uint8_t *src,
+ unsigned int src_stride,
+ uint8_t *output_ptr,
+ unsigned int output_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ unsigned int i, j;
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+ }
+ output_ptr += output_stride;
+ }
+}
+
+static void filter_average_block2d_8_c(const uint8_t *src_ptr,
+ const unsigned int src_stride,
+ const int16_t *HFilter,
+ const int16_t *VFilter,
+ uint8_t *dst_ptr,
+ unsigned int dst_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ uint8_t tmp[16*16];
+
+ assert(output_width <= 16);
+ assert(output_height <= 16);
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16,
+ output_width, output_height);
+ block2d_average_c(tmp, 16, dst_ptr, dst_stride,
+ output_width, output_height);
+}
+
+class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
+ protected:
+ static const int kOuterBlockSize = 32;
+ static const int kInputStride = kOuterBlockSize;
+ static const int kOutputStride = kOuterBlockSize;
+ static const int kMaxDimension = 16;
+
+ int Width() const { return GET_PARAM(0); }
+ int Height() const { return GET_PARAM(1); }
+ int BorderLeft() const { return (kOuterBlockSize - Width()) / 2; }
+ int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
+
+ bool IsIndexInBorder(int i) {
+ return (i < BorderTop() * kOuterBlockSize ||
+ i >= (BorderTop() + Height()) * kOuterBlockSize ||
+ i % kOuterBlockSize < BorderLeft() ||
+ i % kOuterBlockSize >= (BorderLeft() + Width()));
+ }
+
+ virtual void SetUp() {
+ UUT_ = GET_PARAM(2);
+ memset(input_, 0, sizeof(input_));
+ /* Set up guard blocks for an inner block cetered in the outer block */
+ for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) {
+ if (IsIndexInBorder(i))
+ output_[i] = 255;
+ else
+ output_[i] = 0;
+ }
+
+ ::libvpx_test::ACMRandom prng;
+ for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i)
+ input_[i] = prng.Rand8();
+ }
+
+ void CheckGuardBlocks() {
+ for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) {
+ if (IsIndexInBorder(i))
+ EXPECT_EQ(255, output_[i]);
+ }
+ }
+
+ uint8_t* input() {
+ return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ }
+
+ uint8_t* output() {
+ return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ }
+
+ const ConvolveFunctions* UUT_;
+ uint8_t input_[kOuterBlockSize * kOuterBlockSize];
+ uint8_t output_[kOuterBlockSize * kOuterBlockSize];
+};
+
+TEST_P(ConvolveTest, GuardBlocks) {
+ CheckGuardBlocks();
+}
+
+TEST_P(ConvolveTest, CopyHoriz) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0};
+
+ REGISTER_STATE_CHECK(
+ UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x])
+ << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, CopyVert) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0};
+
+ REGISTER_STATE_CHECK(
+ UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x])
+ << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, Copy2D) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0};
+
+ REGISTER_STATE_CHECK(
+ UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x])
+ << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ uint8_t ref[kOutputStride * kMaxDimension];
+
+ const int16_t filters[][8] = {
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ { 0, 1, -5, 126, 8, -3, 1, 0},
+ { -1, 3, -10, 122, 18, -6, 2, 0},
+ { -1, 4, -13, 118, 27, -9, 3, -1},
+ { -1, 4, -16, 112, 37, -11, 4, -1},
+ { -1, 5, -18, 105, 48, -14, 4, -1},
+ { -1, 5, -19, 97, 58, -16, 5, -1},
+ { -1, 6, -19, 88, 68, -18, 5, -1},
+ { -1, 6, -19, 78, 78, -19, 6, -1},
+ { -1, 5, -18, 68, 88, -19, 6, -1},
+ { -1, 5, -16, 58, 97, -19, 5, -1},
+ { -1, 4, -14, 48, 105, -18, 5, -1},
+ { -1, 4, -11, 37, 112, -16, 4, -1},
+ { -1, 3, -9, 27, 118, -13, 4, -1},
+ { 0, 2, -6, 18, 122, -10, 3, -1},
+ { 0, 1, -3, 8, 126, -5, 1, 0}
+ };
+
+ const int kNumFilters = sizeof(filters) / sizeof(filters[0]);
+
+ for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+ for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+ filter_block2d_8_c(in, kInputStride,
+ filters[filter_x], filters[filter_y],
+ ref, kOutputStride,
+ Width(), Height());
+
+ if (filter_x && filter_y)
+ REGISTER_STATE_CHECK(
+ UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else if (filter_y)
+ REGISTER_STATE_CHECK(
+ UUT_->v8_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else
+ REGISTER_STATE_CHECK(
+ UUT_->h8_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x])
+ << "mismatch at (" << x << "," << y << "), "
+ << "filters (" << filter_x << "," << filter_y << ")";
+ }
+ }
+}
+
+TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ uint8_t ref[kOutputStride * kMaxDimension];
+
+ // Populate ref and out with some random data
+ ::libvpx_test::ACMRandom prng;
+ for (int y = 0; y < Height(); ++y) {
+ for (int x = 0; x < Width(); ++x) {
+ const uint8_t r = prng.Rand8();
+
+ out[y * kOutputStride + x] = r;
+ ref[y * kOutputStride + x] = r;
+ }
+ }
+
+ const int16_t filters[][8] = {
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ { 0, 1, -5, 126, 8, -3, 1, 0},
+ { -1, 3, -10, 122, 18, -6, 2, 0},
+ { -1, 4, -13, 118, 27, -9, 3, -1},
+ { -1, 4, -16, 112, 37, -11, 4, -1},
+ { -1, 5, -18, 105, 48, -14, 4, -1},
+ { -1, 5, -19, 97, 58, -16, 5, -1},
+ { -1, 6, -19, 88, 68, -18, 5, -1},
+ { -1, 6, -19, 78, 78, -19, 6, -1},
+ { -1, 5, -18, 68, 88, -19, 6, -1},
+ { -1, 5, -16, 58, 97, -19, 5, -1},
+ { -1, 4, -14, 48, 105, -18, 5, -1},
+ { -1, 4, -11, 37, 112, -16, 4, -1},
+ { -1, 3, -9, 27, 118, -13, 4, -1},
+ { 0, 2, -6, 18, 122, -10, 3, -1},
+ { 0, 1, -3, 8, 126, -5, 1, 0}
+ };
+
+ const int kNumFilters = sizeof(filters) / sizeof(filters[0]);
+
+ for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+ for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+ filter_average_block2d_8_c(in, kInputStride,
+ filters[filter_x], filters[filter_y],
+ ref, kOutputStride,
+ Width(), Height());
+
+ if (filter_x && filter_y)
+ REGISTER_STATE_CHECK(
+ UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else if (filter_y)
+ REGISTER_STATE_CHECK(
+ UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else
+ REGISTER_STATE_CHECK(
+ UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x])
+ << "mismatch at (" << x << "," << y << "), "
+ << "filters (" << filter_x << "," << filter_y << ")";
+ }
+ }
+}
+
+TEST_P(ConvolveTest, ChangeFilterWorks) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+
+ const int16_t filters[][8] = {
+ { 0, 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 128},
+ { 0, 0, 0, 128},
+ { 0, 0, 128},
+ { 0, 128},
+ { 128},
+ { 0, 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 128},
+ { 0, 0, 0, 128},
+ { 0, 0, 128},
+ { 0, 128},
+ { 128},
+ { 0, 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 128},
+ { 0, 0, 0, 128},
+ { 0, 0, 128},
+ { 0, 128},
+ { 128},
+ };
+
+ REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
+ filters[0], 17, filters[4], 16,
+ Width(), Height()));
+
+ for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) {
+ ASSERT_EQ(in[4], out[x]) << "x == " << x;
+ }
+
+ REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
+ filters[4], 16, filters[0], 17,
+ Width(), Height()));
+
+ for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) {
+ ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+ }
+
+ REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ filters[0], 17, filters[0], 17,
+ Width(), Height()));
+
+ for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) {
+ for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) {
+ ASSERT_EQ(in[4 * kInputStride + 4], out[y * kOutputStride + x])
+ << "x == " << x << ", y == " << y;
+ }
+ }
+}
+
+
+using std::tr1::make_tuple;
+
+const ConvolveFunctions convolve8_2d_only_c(
+ vp9_convolve8_c, vp9_convolve8_avg_c,
+ vp9_convolve8_c, vp9_convolve8_avg_c,
+ vp9_convolve8_c, vp9_convolve8_avg_c);
+
+const ConvolveFunctions convolve8_c(
+ vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
+ vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
+ vp9_convolve8_c, vp9_convolve8_avg_c);
+
+INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
+ make_tuple(4, 4, &convolve8_2d_only_c),
+ make_tuple(8, 4, &convolve8_2d_only_c),
+ make_tuple(8, 8, &convolve8_2d_only_c),
+ make_tuple(16, 16, &convolve8_2d_only_c),
+ make_tuple(4, 4, &convolve8_c),
+ make_tuple(8, 4, &convolve8_c),
+ make_tuple(8, 8, &convolve8_c),
+ make_tuple(16, 16, &convolve8_c)));
+}
diff --git a/test/test.mk b/test/test.mk
index f275a47f2..46b055e23 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -68,6 +68,7 @@ LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
endif
+LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
new file mode 100644
index 000000000..ed188c3f2
--- /dev/null
+++ b/vp9/common/vp9_convolve.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+#include "vpx/vpx_integer.h"
+
+#define VP9_FILTER_WEIGHT 128
+#define VP9_FILTER_SHIFT 7
+#define ALIGN_FILTERS_256 0
+
+/* Assume a bank of 16 filters to choose from. There are two implementations
+ * for filter wrapping behavior, since we want to be able to pick which filter
+ * to start with. We could either:
+ *
+ * 1) make filter_ a pointer to the base of the filter array, and then add an
+ * additional offset parameter, to choose the starting filter.
+ * 2) use a pointer to 2 periods worth of filters, so that even if the original
+ * phase offset is at 15/16, we'll have valid data to read. The filter
+ * tables become [32][8], and the second half is duplicated.
+ * 3) fix the alignment of the filter tables, so that we know the 0/16 is
+ * always 256 byte aligned.
+ *
+ * Implementations 2 and 3 are likely preferable, as they avoid an extra 2
+ * parameters, and switching between them is trivial.
+ */
+static void convolve_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x0, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+ const int16_t *filter_x_base = filter_x0;
+
+#if ALIGN_FILTERS_256
+ filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source line */
+ src -= taps / 2 - 1;
+
+ for (y = 0; y < h; ++y) {
+ /* Pointer to filter to use */
+ const int16_t *filter_x = filter_x0;
+
+ /* Initial phase offset */
+ int x_q4 = (filter_x - filter_x_base) / taps;
+
+ for (x = 0; x < w; ++x) {
+ /* Per-pixel src offset */
+ int src_x = x_q4 >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[src_x + k] * filter_x[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);
+
+ /* Adjust source and filter to use for the next pixel */
+ x_q4 += x_step_q4;
+ filter_x = filter_x_base + (x_q4 & 0xf) * taps;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void convolve_avg_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x0, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+ const int16_t *filter_x_base = filter_x0;
+
+#if ALIGN_FILTERS_256
+ filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source line */
+ src -= taps / 2 - 1;
+
+ for (y = 0; y < h; ++y) {
+ /* Pointer to filter to use */
+ const int16_t *filter_x = filter_x0;
+
+ /* Initial phase offset */
+ int x_q4 = (filter_x - filter_x_base) / taps;
+
+ for (x = 0; x < w; ++x) {
+ /* Per-pixel src offset */
+ int src_x = x_q4 >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[src_x + k] * filter_x[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
+
+ /* Adjust source and filter to use for the next pixel */
+ x_q4 += x_step_q4;
+ filter_x = filter_x_base + (x_q4 & 0xf) * taps;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void convolve_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y0, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+
+ const int16_t *filter_y_base = filter_y0;
+
+#if ALIGN_FILTERS_256
+ filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source column */
+ src -= src_stride * (taps / 2 - 1);
+ for (x = 0; x < w; ++x) {
+ /* Pointer to filter to use */
+ const int16_t *filter_y = filter_y0;
+
+ /* Initial phase offset */
+ int y_q4 = (filter_y - filter_y_base) / taps;
+
+ for (y = 0; y < h; ++y) {
+ /* Per-pixel src offset */
+ int src_y = y_q4 >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[(src_y + k) * src_stride] * filter_y[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);
+
+ /* Adjust source and filter to use for the next pixel */
+ y_q4 += y_step_q4;
+ filter_y = filter_y_base + (y_q4 & 0xf) * taps;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+static void convolve_avg_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y0, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+
+ const int16_t *filter_y_base = filter_y0;
+
+#if ALIGN_FILTERS_256
+ filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source column */
+ src -= src_stride * (taps / 2 - 1);
+ for (x = 0; x < w; ++x) {
+ /* Pointer to filter to use */
+ const int16_t *filter_y = filter_y0;
+
+ /* Initial phase offset */
+ int y_q4 = (filter_y - filter_y_base) / taps;
+
+ for (y = 0; y < h; ++y) {
+ /* Per-pixel src offset */
+ int src_y = y_q4 >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[(src_y + k) * src_stride] * filter_y[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[y * dst_stride] =
+ (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
+
+ /* Adjust source and filter to use for the next pixel */
+ y_q4 += y_step_q4;
+ filter_y = filter_y_base + (y_q4 & 0xf) * taps;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+static void convolve_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ uint8_t temp[16 * 23];
+ assert(w <= 16);
+ assert(h <= 16);
+ assert(taps <= 8);
+
+ convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
+ temp, 16,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h + taps - 1, taps);
+ convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, taps);
+}
+
+static void convolve_avg_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ uint8_t temp[16 * 23];
+ assert(w <= 16);
+ assert(h <= 16);
+ assert(taps <= 8);
+
+ convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
+ temp, 16,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h + taps - 1, taps);
+ convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, taps);
+}
+
+void vp9_convolve8_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_avg_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
new file mode 100644
index 000000000..46c935ab7
--- /dev/null
+++ b/vp9/common/vp9_convolve.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_COMMON_CONVOLVE_H_
+#define VP9_COMMON_CONVOLVE_H_
+
+#include "vpx/vpx_integer.h"
+
+typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block copy conforming to the convolution prototype
+void vp9_convolve_copy(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block average conforming to the convolution prototype
+void vp9_convolve_avg(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+struct subpix_fn_table {
+ convolve_fn_t predict[2][2][2]; // horiz, vert, avg
+ const int16_t (*filter_x)[8];
+ const int16_t (*filter_y)[8];
+ int x_step_q4;
+ int y_step_q4;
+};
+
+#endif // VP9_COMMON_CONVOLVE_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 39af2080a..762dd75c0 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -269,6 +269,24 @@ specialize vp9_sub_pixel_variance16x2 sse2
#
# Sub Pixel Filters
#
+prototype void vp9_convolve8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8
+
+prototype void vp9_convolve8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_horiz
+
+prototype void vp9_convolve8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_vert
+
+prototype void vp9_convolve8_avg "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_avg
+
+prototype void vp9_convolve8_avg_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_avg_horiz
+
+prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_avg_vert
+
prototype void vp9_eighttap_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_eighttap_predict16x16
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 0d208e9a3..d1805be62 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -16,6 +16,8 @@ VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
VP9_COMMON_SRCS-yes += common/vp9_asm_com_offsets.c
VP9_COMMON_SRCS-yes += common/vp9_blockd.c
VP9_COMMON_SRCS-yes += common/vp9_coefupdateprobs.h
+VP9_COMMON_SRCS-yes += common/vp9_convolve.c
+VP9_COMMON_SRCS-yes += common/vp9_convolve.h
VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c
VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h
VP9_COMMON_SRCS-yes += common/vp9_entropy.c