4 files changed, 488 insertions, 6 deletions
diff --git a/test/test.mk b/test/test.mk
index 3cf3202b8..2b7636185 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -171,6 +171,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_scale_test.cc
 ifneq ($(CONFIG_REALTIME_ONLY),yes)
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += temporal_filter_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += yuv_temporal_filter_test.cc
 endif
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_block_error_test.cc
diff --git a/test/yuv_temporal_filter_test.cc b/test/yuv_temporal_filter_test.cc
new file mode 100644
index 000000000..733354754
--- /dev/null
+++ b/test/yuv_temporal_filter_test.cc
@@ -0,0 +1,479 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "test/acm_random.h"
+#include "test/buffer.h"
+#include "test/register_state_check.h"
+#include "vpx_ports/vpx_timer.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+using ::libvpx_test::Buffer;
+
+typedef void (*YUVTemporalFilterFunc)(
+    const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre,
+    int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src,
+    int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre,
+    int uv_pre_stride, unsigned int block_width, unsigned int block_height,
+    int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32,
+    uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator,
+    uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count);
+
+static INLINE int get_filter_weight(unsigned int row, unsigned int col,
+                                    unsigned int block_height,
+                                    unsigned int block_width,
+                                    const int *const blk_fw, int use_32x32) {
+  if (use_32x32) {
+    return blk_fw[0];
+  }
+
+  return blk_fw[2 * (row >= block_height / 2) + (col >= block_width / 2)];
+}
+
+static INLINE int mod_index(int sum_dist, int index, int rounding, int strength,
+                            int filter_weight) {
+  int mod = (sum_dist * 3) / index;
+  mod += rounding;
+  mod >>= strength;
+
+  mod = VPXMIN(16, mod);
+
+  mod = 16 - mod;
+  mod *= filter_weight;
+
+  return mod;
+}
+
+void reference_filter(
+    const Buffer<uint8_t> &y_src, const Buffer<uint8_t> &y_pre,
+    const Buffer<uint8_t> &u_src, const Buffer<uint8_t> &v_src,
+    const Buffer<uint8_t> &u_pre, const Buffer<uint8_t> &v_pre,
+    unsigned int block_width, unsigned int block_height, int ss_x, int ss_y,
+    int strength, const int *const blk_fw, int use_32x32,
+    Buffer<uint32_t> *y_accumulator, Buffer<uint16_t> *y_count,
+    Buffer<uint32_t> *u_accumulator, Buffer<uint16_t> *u_count,
+    Buffer<uint32_t> *v_accumulator, Buffer<uint16_t> *v_count) {
+  // blk_fw means block_filter_weight
+  // Set up buffer to store squared_diffs
+  Buffer<int> y_dif = Buffer<int>(block_width, block_height, 0);
+  const int uv_block_width = block_width >> ss_x;
+  const int uv_block_height = block_height >> ss_y;
+  Buffer<int> u_dif = Buffer<int>(uv_block_width, uv_block_height, 0);
+  Buffer<int> v_dif = Buffer<int>(uv_block_width, uv_block_height, 0);
+  ASSERT_TRUE(y_dif.Init());
+  ASSERT_TRUE(u_dif.Init());
+  ASSERT_TRUE(v_dif.Init());
+  y_dif.Set(0);
+  u_dif.Set(0);
+  v_dif.Set(0);
+
+  // How many bits to we want round
+  ASSERT_GE(strength, 0);
+  ASSERT_LE(strength, 6);
+  int rounding = 0;
+  if (strength > 0) {
+    rounding = 1 << (strength - 1);
+  }
+
+  // Check that the buffers are valid
+  ASSERT_TRUE(y_src.TopLeftPixel() != NULL);
+  ASSERT_TRUE(y_pre.TopLeftPixel() != NULL);
+  ASSERT_TRUE(y_dif.TopLeftPixel() != NULL);
+  ASSERT_TRUE(u_src.TopLeftPixel() != NULL);
+  ASSERT_TRUE(u_pre.TopLeftPixel() != NULL);
+  ASSERT_TRUE(u_dif.TopLeftPixel() != NULL);
+  ASSERT_TRUE(v_src.TopLeftPixel() != NULL);
+  ASSERT_TRUE(v_pre.TopLeftPixel() != NULL);
+  ASSERT_TRUE(v_dif.TopLeftPixel() != NULL);
+
+  // Get the square diffs
+  for (int row = 0; row < (int)block_height; row++) {
+    for (int col = 0; col < (int)block_width; col++) {
+      int diff = y_src.TopLeftPixel()[row * y_src.stride() + col] -
+                 y_pre.TopLeftPixel()[row * y_pre.stride() + col];
+      y_dif.TopLeftPixel()[row * y_dif.stride() + col] = diff * diff;
+    }
+  }
+
+  for (int row = 0; row < uv_block_height; row++) {
+    for (int col = 0; col < uv_block_width; col++) {
+      int u_diff = u_src.TopLeftPixel()[row * u_src.stride() + col] -
+                   u_pre.TopLeftPixel()[row * u_pre.stride() + col];
+      int v_diff = v_src.TopLeftPixel()[row * v_src.stride() + col] -
+                   v_pre.TopLeftPixel()[row * v_pre.stride() + col];
+      u_dif.TopLeftPixel()[row * u_dif.stride() + col] = u_diff * u_diff;
+      v_dif.TopLeftPixel()[row * v_dif.stride() + col] = v_diff * v_diff;
+    }
+  }
+
+  // Apply the filter
+  for (int row = 0; row < (int)block_height; row++) {
+    for (int col = 0; col < (int)block_width; col++) {
+      const int uv_r = row >> ss_y;
+      const int uv_c = col >> ss_x;
+      int filter_weight = get_filter_weight(row, col, block_height, block_width,
+                                            blk_fw, use_32x32);
+
+      // First we get the modifier for the current y pixel
+      const int y_pixel = y_pre.TopLeftPixel()[row * y_pre.stride() + col];
+      int y_num_used = 0;
+      int y_mod = 0;
+
+      // Sum the neighboring 3x3 y pixels
+      for (int row_step = -1; row_step <= 1; row_step++) {
+        for (int col_step = -1; col_step <= 1; col_step++) {
+          const int sub_row = row + row_step;
+          const int sub_col = col + col_step;
+
+          if (sub_row >= 0 && sub_row < (int)block_height && sub_col >= 0 &&
+              sub_col < (int)block_width) {
+            y_mod += y_dif.TopLeftPixel()[sub_row * y_dif.stride() + sub_col];
+            y_num_used++;
+          }
+        }
+      }
+
+      ASSERT_GE(y_num_used, 0);
+
+      // Sum the corresponding uv pixels to the current y modifier
+      // Note we are rounding down instead of rounding to the nearest pixel.
+      y_mod += u_dif.TopLeftPixel()[uv_r * uv_block_width + uv_c];
+      y_mod += v_dif.TopLeftPixel()[uv_r * uv_block_width + uv_c];
+
+      y_num_used += 2;
+
+      // Set the modifier
+      y_mod = mod_index(y_mod, y_num_used, rounding, strength, filter_weight);
+
+      // Accumulate the result
+      y_count->TopLeftPixel()[row * y_count->stride() + col] += y_mod;
+      y_accumulator->TopLeftPixel()[row * y_accumulator->stride() + col] +=
+          y_mod * y_pixel;
+
+      // Get the modifier for chroma components
+      if (!(row & ss_y) && !(col & ss_x)) {
+        const int u_pixel = u_pre.TopLeftPixel()[uv_r * u_pre.stride() + uv_c];
+        const int v_pixel = v_pre.TopLeftPixel()[uv_r * v_pre.stride() + uv_c];
+
+        int uv_num_used = 0;
+        int u_mod = 0, v_mod = 0;
+
+        // Sum the neighboring 3x3 chromal pixels to the chroma modifier
+        for (int row_step = -1; row_step <= 1; row_step++) {
+          for (int col_step = -1; col_step <= 1; col_step++) {
+            const int sub_row = uv_r + row_step;
+            const int sub_col = uv_c + col_step;
+
+            if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 &&
+                sub_col < uv_block_width) {
+              u_mod += u_dif.TopLeftPixel()[sub_row * uv_block_width + sub_col];
+              v_mod += v_dif.TopLeftPixel()[sub_row * uv_block_width + sub_col];
+              uv_num_used++;
+            }
+          }
+        }
+
+        assert(uv_num_used > 0);
+
+        // Sum all the luma pixels associated with the current luma pixel
+        for (int row_step = 0; row_step < 1 + ss_y; row_step++) {
+          for (int col_step = 0; col_step < 1 + ss_x; col_step++) {
+            const int sub_row = (uv_r << ss_y) + row_step;
+            const int sub_col = (uv_c << ss_x) + col_step;
+            const int y_diff =
+                y_dif.TopLeftPixel()[sub_row * y_dif.stride() + sub_col];
+
+            u_mod += y_diff;
+            v_mod += y_diff;
+            uv_num_used++;
+          }
+        }
+
+        // Set the modifier
+        u_mod =
+            mod_index(u_mod, uv_num_used, rounding, strength, filter_weight);
+        v_mod =
+            mod_index(v_mod, uv_num_used, rounding, strength, filter_weight);
+
+        // Accumuate the result
+        u_count->TopLeftPixel()[uv_r * u_count->stride() + uv_c] += u_mod;
+        u_accumulator->TopLeftPixel()[uv_r * u_accumulator->stride() + uv_c] +=
+            u_mod * u_pixel;
+        v_count->TopLeftPixel()[uv_r * u_count->stride() + uv_c] += v_mod;
+        v_accumulator->TopLeftPixel()[uv_r * v_accumulator->stride() + uv_c] +=
+            v_mod * v_pixel;
+      }
+    }
+  }
+}
+
+class YUVTemporalFilterTest
+    : public ::testing::TestWithParam<YUVTemporalFilterFunc> {
+ public:
+  virtual void SetUp() {
+    filter_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  YUVTemporalFilterFunc filter_func_;
+  ACMRandom rnd_;
+};
+
+TEST_P(YUVTemporalFilterTest, USE_32X32) {
+  const int width = 32, height = 32;
+  Buffer<uint8_t> y_src = Buffer<uint8_t>(width, height, 8);
+  Buffer<uint8_t> y_pre = Buffer<uint8_t>(width, height, 0);
+  Buffer<uint16_t> y_count_ref = Buffer<uint16_t>(width, height, 0);
+  Buffer<uint32_t> y_accum_ref = Buffer<uint32_t>(width, height, 0);
+  Buffer<uint16_t> y_count_tst = Buffer<uint16_t>(width, height, 0);
+  Buffer<uint32_t> y_accum_tst = Buffer<uint32_t>(width, height, 0);
+  ASSERT_TRUE(y_src.Init());
+  ASSERT_TRUE(y_pre.Init());
+  ASSERT_TRUE(y_count_ref.Init());
+  ASSERT_TRUE(y_accum_ref.Init());
+  ASSERT_TRUE(y_count_tst.Init());
+  ASSERT_TRUE(y_accum_tst.Init());
+
+  const int use_32x32 = 1;
+
+  for (int ss_x = 0; ss_x <= 1; ss_x++) {
+    for (int ss_y = 0; ss_y <= 1; ss_y++) {
+      for (int filter_strength = 0; filter_strength <= 6;
+           filter_strength += 2) {
+        for (int filter_weight = 0; filter_weight <= 2; filter_weight++) {
+          const int uv_width = width >> ss_x, uv_height = height >> ss_y;
+          Buffer<uint8_t> u_src = Buffer<uint8_t>(uv_width, uv_height, 8);
+          Buffer<uint8_t> u_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> u_count_ref =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> u_accum_ref =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> u_count_tst =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> u_accum_tst =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          ASSERT_TRUE(u_src.Init());
+          ASSERT_TRUE(u_pre.Init());
+          ASSERT_TRUE(u_count_ref.Init());
+          ASSERT_TRUE(u_accum_ref.Init());
+          ASSERT_TRUE(u_count_tst.Init());
+          ASSERT_TRUE(u_accum_tst.Init());
+          Buffer<uint8_t> v_src = Buffer<uint8_t>(uv_width, uv_height, 8);
+          Buffer<uint8_t> v_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> v_count_ref =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> v_accum_ref =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> v_count_tst =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> v_accum_tst =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          ASSERT_TRUE(v_src.Init());
+          ASSERT_TRUE(v_pre.Init());
+          ASSERT_TRUE(v_count_ref.Init());
+          ASSERT_TRUE(v_accum_ref.Init());
+          ASSERT_TRUE(v_count_tst.Init());
+          ASSERT_TRUE(v_accum_tst.Init());
+
+          // The difference between the buffers must be small to pass the
+          // threshold to apply the filter.
+          y_src.Set(&rnd_, 0, 7);
+          y_pre.Set(&rnd_, 0, 7);
+          u_src.Set(&rnd_, 0, 7);
+          u_pre.Set(&rnd_, 0, 7);
+          v_src.Set(&rnd_, 0, 7);
+          v_pre.Set(&rnd_, 0, 7);
+
+          y_accum_ref.Set(rnd_.Rand8());
+          y_accum_tst.CopyFrom(y_accum_ref);
+          y_count_ref.Set(rnd_.Rand8());
+          y_count_tst.CopyFrom(y_count_ref);
+          u_accum_ref.Set(rnd_.Rand8());
+          u_accum_tst.CopyFrom(u_accum_ref);
+          u_count_ref.Set(rnd_.Rand8());
+          u_count_tst.CopyFrom(u_count_ref);
+          v_accum_ref.Set(rnd_.Rand8());
+          v_accum_tst.CopyFrom(v_accum_ref);
+          v_count_ref.Set(rnd_.Rand8());
+          v_count_tst.CopyFrom(v_count_ref);
+
+          reference_filter(y_src, y_pre, u_src, v_src, u_pre, v_pre, width,
+                           height, ss_x, ss_y, filter_strength, &filter_weight,
+                           use_32x32, &y_accum_ref, &y_count_ref, &u_accum_ref,
+                           &u_count_ref, &v_accum_ref, &v_count_ref);
+          ASM_REGISTER_STATE_CHECK(filter_func_(
+              y_src.TopLeftPixel(), y_src.stride(), y_pre.TopLeftPixel(),
+              y_pre.stride(), u_src.TopLeftPixel(), v_src.TopLeftPixel(),
+              u_src.stride(), u_pre.TopLeftPixel(), v_pre.TopLeftPixel(),
+              u_pre.stride(), width, height, ss_x, ss_y, filter_strength,
+              &filter_weight, use_32x32, y_accum_tst.TopLeftPixel(),
+              y_count_tst.TopLeftPixel(), u_accum_tst.TopLeftPixel(),
+              u_count_tst.TopLeftPixel(), v_accum_tst.TopLeftPixel(),
+              v_count_tst.TopLeftPixel()));
+
+          EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref));
+          EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref));
+          EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref));
+          EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref));
+          EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref));
+          EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref));
+
+          if (HasFailure()) {
+            printf("SS_X: %d, SS_Y: %d, Weight: %d, Strength: %d\n", ss_x, ss_y,
+                   filter_weight, filter_strength);
+            y_accum_tst.PrintDifference(y_accum_ref);
+            y_count_tst.PrintDifference(y_count_ref);
+            u_accum_tst.PrintDifference(u_accum_ref);
+            u_count_tst.PrintDifference(u_count_ref);
+            v_accum_tst.PrintDifference(v_accum_ref);
+            v_count_tst.PrintDifference(v_count_ref);
+            return;
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST_P(YUVTemporalFilterTest, USE_16X16) {
+  const int width = 32, height = 32;
+  Buffer<uint8_t> y_src = Buffer<uint8_t>(width, height, 8);
+  Buffer<uint8_t> y_pre = Buffer<uint8_t>(width, height, 0);
+  Buffer<uint16_t> y_count_ref = Buffer<uint16_t>(width, height, 0);
+  Buffer<uint32_t> y_accum_ref = Buffer<uint32_t>(width, height, 0);
+  Buffer<uint16_t> y_count_tst = Buffer<uint16_t>(width, height, 0);
+  Buffer<uint32_t> y_accum_tst = Buffer<uint32_t>(width, height, 0);
+  ASSERT_TRUE(y_src.Init());
+  ASSERT_TRUE(y_pre.Init());
+  ASSERT_TRUE(y_count_ref.Init());
+  ASSERT_TRUE(y_accum_ref.Init());
+  ASSERT_TRUE(y_count_tst.Init());
+  ASSERT_TRUE(y_accum_tst.Init());
+
+  const int use_32x32 = 0;
+
+  for (int ss_x = 0; ss_x <= 1; ss_x++) {
+    for (int ss_y = 0; ss_y <= 1; ss_y++) {
+      for (int filter_idx = 0; filter_idx < 3 * 3 * 3 * 3; filter_idx++) {
+        // Set up the filter
+        int filter_weight[4];
+        int filter_idx_cp = filter_idx;
+        for (int idx = 0; idx < 4; idx++) {
+          filter_weight[idx] = filter_idx_cp % 3;
+          filter_idx_cp /= 3;
+        }
+
+        // Test each parameter
+        for (int filter_strength = 0; filter_strength <= 6;
+             filter_strength += 2) {
+          const int uv_width = width >> ss_x, uv_height = height >> ss_y;
+          Buffer<uint8_t> u_src = Buffer<uint8_t>(uv_width, uv_height, 8);
+          Buffer<uint8_t> u_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> u_count_ref =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> u_accum_ref =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> u_count_tst =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> u_accum_tst =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          ASSERT_TRUE(u_src.Init());
+          ASSERT_TRUE(u_pre.Init());
+          ASSERT_TRUE(u_count_ref.Init());
+          ASSERT_TRUE(u_accum_ref.Init());
+          ASSERT_TRUE(u_count_tst.Init());
+          ASSERT_TRUE(u_accum_tst.Init());
+          Buffer<uint8_t> v_src = Buffer<uint8_t>(uv_width, uv_height, 8);
+          Buffer<uint8_t> v_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> v_count_ref =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> v_accum_ref =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          Buffer<uint16_t> v_count_tst =
+              Buffer<uint16_t>(uv_width, uv_height, 0);
+          Buffer<uint32_t> v_accum_tst =
+              Buffer<uint32_t>(uv_width, uv_height, 0);
+          ASSERT_TRUE(v_src.Init());
+          ASSERT_TRUE(v_pre.Init());
+          ASSERT_TRUE(v_count_ref.Init());
+          ASSERT_TRUE(v_accum_ref.Init());
+          ASSERT_TRUE(v_count_tst.Init());
+          ASSERT_TRUE(v_accum_tst.Init());
+
+          // The difference between the buffers must be small to pass the
+          // threshold to apply the filter.
+          y_src.Set(&rnd_, 0, 7);
+          y_pre.Set(&rnd_, 0, 7);
+          u_src.Set(&rnd_, 0, 7);
+          u_pre.Set(&rnd_, 0, 7);
+          v_src.Set(&rnd_, 0, 7);
+          v_pre.Set(&rnd_, 0, 7);
+
+          y_accum_ref.Set(rnd_.Rand8());
+          y_accum_tst.CopyFrom(y_accum_ref);
+          y_count_ref.Set(rnd_.Rand8());
+          y_count_tst.CopyFrom(y_count_ref);
+          u_accum_ref.Set(rnd_.Rand8());
+          u_accum_tst.CopyFrom(u_accum_ref);
+          u_count_ref.Set(rnd_.Rand8());
+          u_count_tst.CopyFrom(u_count_ref);
+          v_accum_ref.Set(rnd_.Rand8());
+          v_accum_tst.CopyFrom(v_accum_ref);
+          v_count_ref.Set(rnd_.Rand8());
+          v_count_tst.CopyFrom(v_count_ref);
+
+          reference_filter(y_src, y_pre, u_src, v_src, u_pre, v_pre, width,
+                           height, ss_x, ss_y, filter_strength, filter_weight,
+                           use_32x32, &y_accum_ref, &y_count_ref, &u_accum_ref,
+                           &u_count_ref, &v_accum_ref, &v_count_ref);
+          ASM_REGISTER_STATE_CHECK(filter_func_(
+              y_src.TopLeftPixel(), y_src.stride(), y_pre.TopLeftPixel(),
+              y_pre.stride(), u_src.TopLeftPixel(), v_src.TopLeftPixel(),
+              u_src.stride(), u_pre.TopLeftPixel(), v_pre.TopLeftPixel(),
+              u_pre.stride(), width, height, ss_x, ss_y, filter_strength,
+              filter_weight, use_32x32, y_accum_tst.TopLeftPixel(),
+              y_count_tst.TopLeftPixel(), u_accum_tst.TopLeftPixel(),
+              u_count_tst.TopLeftPixel(), v_accum_tst.TopLeftPixel(),
+              v_count_tst.TopLeftPixel()));
+
+          EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref));
+          EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref));
+          EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref));
+          EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref));
+          EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref));
+          EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref));
+
+          if (HasFailure()) {
+            printf("SS_X: %d, SS_Y: %d, Weight Idx: %d, Strength: %d\n", ss_x,
+                   ss_y, filter_idx, filter_strength);
+            y_accum_tst.PrintDifference(y_accum_ref);
+            y_count_tst.PrintDifference(y_count_ref);
+            u_accum_tst.PrintDifference(u_accum_ref);
+            u_count_tst.PrintDifference(u_count_ref);
+            v_accum_tst.PrintDifference(v_accum_ref);
+            v_count_tst.PrintDifference(v_count_ref);
+            return;
+          }
+        }
+      }
+    }
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, YUVTemporalFilterTest,
+                        ::testing::Values(&vp9_apply_temporal_filter));
+}  // namespace
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 8bb68cfdf..3102b08a7 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -186,6 +186,8 @@ specialize qw/vp9_diamond_search_sad avx/;
 if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
 add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
 specialize qw/vp9_temporal_filter_apply sse4_1/;
+
+add_proto qw/void vp9_apply_temporal_filter/, "const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count";
 }
 
 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index cd340c394..23943bb18 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -198,8 +198,8 @@ static INLINE int mod_index(int sum_dist, int index, int rounding, int strength,
 
 static INLINE int get_filter_weight(unsigned int i, unsigned int j,
                                     unsigned int block_height,
-                                    unsigned int block_width, int *blk_fw,
-                                    int use_32x32) {
+                                    unsigned int block_width,
+                                    const int *const blk_fw, int use_32x32) {
   int filter_weight = 0;
 
   if (use_32x32)
@@ -220,12 +220,12 @@ static INLINE int get_filter_weight(unsigned int i, unsigned int j,
   return filter_weight;
 }
 
-static void apply_temporal_filter(
+void vp9_apply_temporal_filter_c(
     const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred,
     int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1,
     int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred,
     int uv_buf_stride, unsigned int block_width, unsigned int block_height,
-    int ss_x, int ss_y, int strength, int *blk_fw, int use_32x32,
+    int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32,
     uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator,
     uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) {
   unsigned int i, j, k, m;
@@ -767,7 +767,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
               count + (BLK_PELS << 1));
         } else {
           // Apply the filter (YUV)
-          apply_temporal_filter(
+          vp9_apply_temporal_filter_c(
               f->y_buffer + mb_y_offset, f->y_stride, predictor, BW,
               f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset,
               f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1),
@@ -778,7 +778,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
         }
 #else
         // Apply the filter (YUV)
-        apply_temporal_filter(
+        vp9_apply_temporal_filter_c(
             f->y_buffer + mb_y_offset, f->y_stride, predictor, BW,
             f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset,
             f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1),