35 files changed, 355 insertions, 359 deletions
diff --git a/args.c b/args.c
index bd1ede038..a87b138b9 100644
--- a/args.c
+++ b/args.c
@@ -13,6 +13,7 @@
 #include <limits.h>
 #include "args.h"
 
+#include "vpx/vpx_integer.h"
 #include "vpx_ports/msvc.h"
 
 #if defined(__GNUC__) && __GNUC__
@@ -118,13 +119,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
 }
 
 unsigned int arg_parse_uint(const struct arg *arg) {
-  long int rawval;
+  uint32_t rawval;
   char *endptr;
 
-  rawval = strtol(arg->val, &endptr, 10);
+  rawval = (uint32_t)strtoul(arg->val, &endptr, 10);
 
   if (arg->val[0] != '\0' && endptr[0] == '\0') {
-    if (rawval >= 0 && rawval <= UINT_MAX) return (unsigned int)rawval;
+    if (rawval <= UINT_MAX) return rawval;
 
     die("Option %s: Value %ld out of range for unsigned int\n", arg->name,
         rawval);
@@ -135,10 +136,10 @@ unsigned int arg_parse_uint(const struct arg *arg) {
 }
 
 int arg_parse_int(const struct arg *arg) {
-  long int rawval;
+  int32_t rawval;
   char *endptr;
 
-  rawval = strtol(arg->val, &endptr, 10);
+  rawval = (int32_t)strtol(arg->val, &endptr, 10);
 
   if (arg->val[0] != '\0' && endptr[0] == '\0') {
     if (rawval >= INT_MIN && rawval <= INT_MAX) return (int)rawval;
diff --git a/configure b/configure
index 95af2e395..27ec8d9cc 100755
--- a/configure
+++ b/configure
@@ -575,6 +575,11 @@ process_toolchain() {
         check_add_cflags -Wimplicit-function-declaration
         check_add_cflags -Wuninitialized
         check_add_cflags -Wunused
+        # -Wextra has some tricky cases. Rather than fix them all now, get the
+        # flag for as many files as possible and fix the remaining issues
+        # piecemeal.
+        # https://bugs.chromium.org/p/webm/issues/detail?id=1069
+        check_add_cflags -Wextra
         # check_add_cflags also adds to cxxflags. gtest does not do well with
         # -Wundef so add it explicitly to CFLAGS only.
         check_cflags -Wundef && add_cflags_only -Wundef
diff --git a/examples.mk b/examples.mk
index cc7fb1ddc..38c4d75c5 100644
--- a/examples.mk
+++ b/examples.mk
@@ -76,6 +76,7 @@ vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += y4menc.c y4menc.h
 ifeq ($(CONFIG_LIBYUV),yes)
   vpxdec.SRCS                 += $(LIBYUV_SRCS)
+  $(BUILD_PFX)third_party/libyuv/%.cc.o: CXXFLAGS += -Wno-unused-parameter
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
   vpxdec.SRCS                 += $(LIBWEBM_COMMON_SRCS)
diff --git a/examples/vp9cx_set_ref.c b/examples/vp9cx_set_ref.c
index 798d7e3f2..3472689db 100644
--- a/examples/vp9cx_set_ref.c
+++ b/examples/vp9cx_set_ref.c
@@ -304,6 +304,7 @@ int main(int argc, char **argv) {
   const char *height_arg = NULL;
   const char *infile_arg = NULL;
   const char *outfile_arg = NULL;
+  const char *update_frame_num_arg = NULL;
   unsigned int limit = 0;
 
   vp9_zero(ecodec);
@@ -318,18 +319,21 @@ int main(int argc, char **argv) {
   height_arg = argv[2];
   infile_arg = argv[3];
   outfile_arg = argv[4];
+  update_frame_num_arg = argv[5];
 
   encoder = get_vpx_encoder_by_name("vp9");
   if (!encoder) die("Unsupported codec.");
 
-  update_frame_num = atoi(argv[5]);
+  update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0);
   // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are
   // allocated while calling vpx_codec_encode(), thus, setting reference for
   // 1st frame isn't supported.
-  if (update_frame_num <= 1) die("Couldn't parse frame number '%s'\n", argv[5]);
+  if (update_frame_num <= 1) {
+    die("Couldn't parse frame number '%s'\n", update_frame_num_arg);
+  }
 
   if (argc > 6) {
-    limit = atoi(argv[6]);
+    limit = (unsigned int)strtoul(argv[6], NULL, 0);
     if (update_frame_num > limit)
       die("Update frame number couldn't larger than limit\n");
   }
diff --git a/libs.mk b/libs.mk
index 9a6092a51..6e12b5404 100644
--- a/libs.mk
+++ b/libs.mk
@@ -106,9 +106,6 @@ ifeq ($(CONFIG_VP9_DECODER),yes)
   CODEC_DOC_SECTIONS += vp9 vp9_decoder
 endif
 
-VP9_PREFIX=vp9/
-$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
-
 ifeq ($(CONFIG_ENCODERS),yes)
   CODEC_DOC_SECTIONS += encoder
 endif
@@ -116,6 +113,12 @@ ifeq ($(CONFIG_DECODERS),yes)
   CODEC_DOC_SECTIONS += decoder
 endif
 
+# Suppress -Wextra warnings in third party code.
+$(BUILD_PFX)third_party/googletest/%.cc.o: CXXFLAGS += -Wno-missing-field-initializers
+# Suppress -Wextra warnings in first party code pending investigation.
+# https://bugs.chromium.org/p/webm/issues/detail?id=1069
+$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
+$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
 
 ifeq ($(CONFIG_MSVS),yes)
 CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
diff --git a/test/avg_test.cc b/test/avg_test.cc
index 867a77aa0..272b99695 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -53,7 +53,7 @@ class AverageTestBase : public ::testing::Test {
   }
 
   // Sum Pixels
-  unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
     unsigned int average = 0;
     for (int h = 0; h < 8; ++h) {
       for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
@@ -61,7 +61,7 @@ class AverageTestBase : public ::testing::Test {
     return ((average + 32) >> 6);
   }
 
-  unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
     unsigned int average = 0;
     for (int h = 0; h < 4; ++h) {
       for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
@@ -98,11 +98,12 @@ class AverageTest : public AverageTestBase,
 
  protected:
   void CheckAverages() {
+    const int block_size = GET_PARAM(3);
     unsigned int expected = 0;
-    if (GET_PARAM(3) == 8) {
+    if (block_size == 8) {
       expected =
           ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
-    } else if (GET_PARAM(3) == 4) {
+    } else if (block_size == 4) {
       expected =
           ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
     }
diff --git a/test/codec_factory.h b/test/codec_factory.h
index e867dacaf..3415284ab 100644
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -115,6 +115,8 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_DECODER
     return new VP8Decoder(cfg, flags);
 #else
+    (void)cfg;
+    (void)flags;
     return NULL;
 #endif
   }
@@ -126,6 +128,10 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
     return new VP8Encoder(cfg, deadline, init_flags, stats);
 #else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
     return NULL;
 #endif
   }
@@ -135,6 +141,8 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
     return vpx_codec_enc_config_default(&vpx_codec_vp8_cx_algo, cfg, usage);
 #else
+    (void)cfg;
+    (void)usage;
     return VPX_CODEC_INCAPABLE;
 #endif
   }
@@ -203,6 +211,8 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_DECODER
     return new VP9Decoder(cfg, flags);
 #else
+    (void)cfg;
+    (void)flags;
     return NULL;
 #endif
   }
@@ -214,6 +224,10 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
     return new VP9Encoder(cfg, deadline, init_flags, stats);
 #else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
     return NULL;
 #endif
   }
@@ -223,6 +237,8 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
     return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
 #else
+    (void)cfg;
+    (void)usage;
     return VPX_CODEC_INCAPABLE;
 #endif
   }
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 432d1b009..e27ece250 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -12,8 +12,8 @@
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
-#include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
@@ -36,6 +36,12 @@ typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y, int filter_y_stride,
                              int w, int h);
 
+typedef void (*WrapperFilterBlock2d8Func)(
+    const uint8_t *src_ptr, const unsigned int src_stride,
+    const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
+    unsigned int dst_stride, unsigned int output_width,
+    unsigned int output_height, int use_highbd);
+
 struct ConvolveFunctions {
   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
                     ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
@@ -43,25 +49,30 @@ struct ConvolveFunctions {
                     ConvolveFunc sh8_avg, ConvolveFunc sv8,
                     ConvolveFunc sv8_avg, ConvolveFunc shv8,
                     ConvolveFunc shv8_avg, int bd)
-      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
-        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
-        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
-        use_highbd_(bd) {}
-
-  ConvolveFunc copy_;
-  ConvolveFunc avg_;
-  ConvolveFunc h8_;
-  ConvolveFunc v8_;
-  ConvolveFunc hv8_;
-  ConvolveFunc h8_avg_;
-  ConvolveFunc v8_avg_;
-  ConvolveFunc hv8_avg_;
-  ConvolveFunc sh8_;       // scaled horiz
-  ConvolveFunc sv8_;       // scaled vert
-  ConvolveFunc shv8_;      // scaled horiz/vert
-  ConvolveFunc sh8_avg_;   // scaled avg horiz
-  ConvolveFunc sv8_avg_;   // scaled avg vert
-  ConvolveFunc shv8_avg_;  // scaled avg horiz/vert
+      : use_highbd_(bd) {
+    copy_[0] = copy;
+    copy_[1] = avg;
+    h8_[0] = h8;
+    h8_[1] = h8_avg;
+    v8_[0] = v8;
+    v8_[1] = v8_avg;
+    hv8_[0] = hv8;
+    hv8_[1] = hv8_avg;
+    sh8_[0] = sh8;
+    sh8_[1] = sh8_avg;
+    sv8_[0] = sv8;
+    sv8_[1] = sv8_avg;
+    shv8_[0] = shv8;
+    shv8_[1] = shv8_avg;
+  }
+
+  ConvolveFunc copy_[2];
+  ConvolveFunc h8_[2];
+  ConvolveFunc v8_[2];
+  ConvolveFunc hv8_[2];
+  ConvolveFunc sh8_[2];   // scaled horiz
+  ConvolveFunc sv8_[2];   // scaled vert
+  ConvolveFunc shv8_[2];  // scaled horiz/vert
   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
 };
 
@@ -82,7 +93,7 @@ typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
 
 void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
-                        const int16_t *HFilter, const int16_t *VFilter,
+                        const int16_t *hfilter, const int16_t *vfilter,
                         uint8_t *dst_ptr, unsigned int dst_stride,
                         unsigned int output_width, unsigned int output_height) {
   // Between passes, we use an intermediate buffer whose height is extended to
@@ -112,10 +123,10 @@ void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
   for (i = 0; i < intermediate_height; ++i) {
     for (j = 0; j < output_width; ++j) {
       // Apply filter...
-      const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
-                       (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
-                       (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
-                       (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+      const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
+                       (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
+                       (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
+                       (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
                        (VP9_FILTER_WEIGHT >> 1);  // Rounding
 
       // Normalize back to 0-255...
@@ -133,10 +144,10 @@ void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
   for (i = 0; i < output_height; ++i) {
     for (j = 0; j < output_width; ++j) {
       // Apply filter...
-      const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
-                       (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
-                       (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
-                       (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
+      const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
+                       (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
+                       (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
+                       (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
                        (VP9_FILTER_WEIGHT >> 1);  // Rounding
 
       // Normalize back to 0-255...
@@ -162,7 +173,7 @@ void block2d_average_c(uint8_t *src, unsigned int src_stride,
 
 void filter_average_block2d_8_c(const uint8_t *src_ptr,
                                 const unsigned int src_stride,
-                                const int16_t *HFilter, const int16_t *VFilter,
+                                const int16_t *hfilter, const int16_t *vfilter,
                                 uint8_t *dst_ptr, unsigned int dst_stride,
                                 unsigned int output_width,
                                 unsigned int output_height) {
@@ -170,7 +181,7 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,
 
   assert(output_width <= kMaxDimension);
   assert(output_height <= kMaxDimension);
-  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
+  filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
                      output_width, output_height);
   block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height);
 }
@@ -178,7 +189,7 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,
 #if CONFIG_VP9_HIGHBITDEPTH
 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
                                const unsigned int src_stride,
-                               const int16_t *HFilter, const int16_t *VFilter,
+                               const int16_t *hfilter, const int16_t *vfilter,
                                uint16_t *dst_ptr, unsigned int dst_stride,
                                unsigned int output_width,
                                unsigned int output_height, int bd) {
@@ -210,10 +221,10 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
     for (i = 0; i < intermediate_height; ++i) {
       for (j = 0; j < output_width; ++j) {
         // Apply filter...
-        const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
-                         (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
-                         (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
-                         (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+        const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
+                         (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
+                         (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
+                         (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
 
         // Normalize back to 0-255...
@@ -234,10 +245,10 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
     for (i = 0; i < output_height; ++i) {
       for (j = 0; j < output_width; ++j) {
         // Apply filter...
-        const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
-                         (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
-                         (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
-                         (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
+        const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
+                         (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
+                         (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
+                         (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
 
         // Normalize back to 0-255...
@@ -265,20 +276,64 @@ void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
 
 void highbd_filter_average_block2d_8_c(
     const uint16_t *src_ptr, const unsigned int src_stride,
-    const int16_t *HFilter, const int16_t *VFilter, uint16_t *dst_ptr,
+    const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr,
     unsigned int dst_stride, unsigned int output_width,
     unsigned int output_height, int bd) {
   uint16_t tmp[kMaxDimension * kMaxDimension];
 
   assert(output_width <= kMaxDimension);
   assert(output_height <= kMaxDimension);
-  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
+  highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
                             output_width, output_height, bd);
   highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width,
                            output_height);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+void wrapper_filter_average_block2d_8_c(
+    const uint8_t *src_ptr, const unsigned int src_stride,
+    const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
+    unsigned int dst_stride, unsigned int output_width,
+    unsigned int output_height, int use_highbd) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_highbd == 0) {
+    filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
+                               dst_stride, output_width, output_height);
+  } else {
+    highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+                                      hfilter, vfilter,
+                                      CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+                                      output_width, output_height, use_highbd);
+  }
+#else
+  ASSERT_EQ(0, use_highbd);
+  filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
+                             dst_stride, output_width, output_height);
+#endif
+}
+
+void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
+                                const unsigned int src_stride,
+                                const int16_t *hfilter, const int16_t *vfilter,
+                                uint8_t *dst_ptr, unsigned int dst_stride,
+                                unsigned int output_width,
+                                unsigned int output_height, int use_highbd) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_highbd == 0) {
+    filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
+                       dst_stride, output_width, output_height);
+  } else {
+    highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, hfilter,
+                              vfilter, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+                              output_width, output_height, use_highbd);
+  }
+#else
+  ASSERT_EQ(0, use_highbd);
+  filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride,
+                     output_width, output_height);
+#endif
+}
+
 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
  public:
   static void SetUpTestCase() {
@@ -461,50 +516,6 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
   }
 
-  void wrapper_filter_average_block2d_8_c(
-      const uint8_t *src_ptr, const unsigned int src_stride,
-      const int16_t *HFilter, const int16_t *VFilter, uint8_t *dst_ptr,
-      unsigned int dst_stride, unsigned int output_width,
-      unsigned int output_height) {
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (UUT_->use_highbd_ == 0) {
-      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                                 dst_stride, output_width, output_height);
-    } else {
-      highbd_filter_average_block2d_8_c(
-          CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
-          CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
-          UUT_->use_highbd_);
-    }
-#else
-    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                               dst_stride, output_width, output_height);
-#endif
-  }
-
-  void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
-                                  const unsigned int src_stride,
-                                  const int16_t *HFilter,
-                                  const int16_t *VFilter, uint8_t *dst_ptr,
-                                  unsigned int dst_stride,
-                                  unsigned int output_width,
-                                  unsigned int output_height) {
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (UUT_->use_highbd_ == 0) {
-      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                         dst_stride, output_width, output_height);
-    } else {
-      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
-                                HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
-                                dst_stride, output_width, output_height,
-                                UUT_->use_highbd_);
-    }
-#else
-    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                       dst_stride, output_width, output_height);
-#endif
-  }
-
   const ConvolveFunctions *UUT_;
   static uint8_t *input_;
   static uint8_t *output_;
@@ -532,8 +543,8 @@ TEST_P(ConvolveTest, Copy) {
   uint8_t *const in = input();
   uint8_t *const out = output();
 
-  ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
-                                       NULL, 0, NULL, 0, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
+                                          NULL, 0, NULL, 0, Width(), Height()));
 
   CheckGuardBlocks();
 
@@ -551,8 +562,8 @@ TEST_P(ConvolveTest, Avg) {
   uint8_t *const out_ref = output_ref();
   CopyOutputToRef();
 
-  ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
-                                      NULL, 0, NULL, 0, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
+                                          NULL, 0, NULL, 0, Width(), Height()));
 
   CheckGuardBlocks();
 
@@ -572,9 +583,9 @@ TEST_P(ConvolveTest, CopyHoriz) {
   DECLARE_ALIGNED(256, const int16_t,
                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
 
-  ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
-                                      filter8, 16, filter8, 16, Width(),
-                                      Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
+                                         filter8, 16, filter8, 16, Width(),
+                                         Height()));
 
   CheckGuardBlocks();
 
@@ -592,9 +603,9 @@ TEST_P(ConvolveTest, CopyVert) {
   DECLARE_ALIGNED(256, const int16_t,
                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
 
-  ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
-                                      filter8, 16, filter8, 16, Width(),
-                                      Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
+                                         filter8, 16, filter8, 16, Width(),
+                                         Height()));
 
   CheckGuardBlocks();
 
@@ -612,9 +623,9 @@ TEST_P(ConvolveTest, Copy2D) {
   DECLARE_ALIGNED(256, const int16_t,
                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
 
-  ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
-                                       filter8, 16, filter8, 16, Width(),
-                                       Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
+                                          filter8, 16, filter8, 16, Width(),
+                                          Height()));
 
   CheckGuardBlocks();
 
@@ -651,137 +662,84 @@ TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
 }
 
 const int16_t kInvalidFilter[8] = { 0 };
+static const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
+  wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
+};
 
 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
+  for (int i = 0; i < 2; ++i) {
+    uint8_t *const in = input();
+    uint8_t *const out = output();
 #if CONFIG_VP9_HIGHBITDEPTH
-  uint8_t ref8[kOutputStride * kMaxDimension];
-  uint16_t ref16[kOutputStride * kMaxDimension];
-  uint8_t *ref;
-  if (UUT_->use_highbd_ == 0) {
-    ref = ref8;
-  } else {
-    ref = CONVERT_TO_BYTEPTR(ref16);
-  }
-#else
-  uint8_t ref[kOutputStride * kMaxDimension];
-#endif
-
-  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
-    const InterpKernel *filters =
-        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
-
-    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
-      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
-        wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
-                                   filters[filter_y], ref, kOutputStride,
-                                   Width(), Height());
-
-        if (filter_x && filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_y)
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                        16, filters[filter_y], 16, Width(), Height()));
-        else if (filter_x)
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
-                        16, kInvalidFilter, 16, Width(), Height()));
-        else
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                          0, kInvalidFilter, 0, Width(), Height()));
-
-        CheckGuardBlocks();
-
-        for (int y = 0; y < Height(); ++y) {
-          for (int x = 0; x < Width(); ++x)
-            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
-                      lookup(out, y * kOutputStride + x))
-                << "mismatch at (" << x << "," << y << "), "
-                << "filters (" << filter_bank << "," << filter_x << ","
-                << filter_y << ")";
-        }
-      }
+    uint8_t ref8[kOutputStride * kMaxDimension];
+    uint16_t ref16[kOutputStride * kMaxDimension];
+    uint8_t *ref;
+    if (UUT_->use_highbd_ == 0) {
+      ref = ref8;
+    } else {
+      ref = CONVERT_TO_BYTEPTR(ref16);
     }
-  }
-}
-
-TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-#if CONFIG_VP9_HIGHBITDEPTH
-  uint8_t ref8[kOutputStride * kMaxDimension];
-  uint16_t ref16[kOutputStride * kMaxDimension];
-  uint8_t *ref;
-  if (UUT_->use_highbd_ == 0) {
-    ref = ref8;
-  } else {
-    ref = CONVERT_TO_BYTEPTR(ref16);
-  }
 #else
-  uint8_t ref[kOutputStride * kMaxDimension];
+    uint8_t ref[kOutputStride * kMaxDimension];
 #endif
 
-  // Populate ref and out with some random data
-  ::libvpx_test::ACMRandom prng;
-  for (int y = 0; y < Height(); ++y) {
-    for (int x = 0; x < Width(); ++x) {
-      uint16_t r;
+    // Populate ref and out with some random data
+    ::libvpx_test::ACMRandom prng;
+    for (int y = 0; y < Height(); ++y) {
+      for (int x = 0; x < Width(); ++x) {
+        uint16_t r;
 #if CONFIG_VP9_HIGHBITDEPTH
-      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
-        r = prng.Rand8Extremes();
-      } else {
-        r = prng.Rand16() & mask_;
-      }
+        if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+          r = prng.Rand8Extremes();
+        } else {
+          r = prng.Rand16() & mask_;
+        }
 #else
-      r = prng.Rand8Extremes();
+        r = prng.Rand8Extremes();
 #endif
 
-      assign_val(out, y * kOutputStride + x, r);
-      assign_val(ref, y * kOutputStride + x, r);
+        assign_val(out, y * kOutputStride + x, r);
+        assign_val(ref, y * kOutputStride + x, r);
+      }
     }
-  }
-
-  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
-    const InterpKernel *filters =
-        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
 
-    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
-      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
-        wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
-                                           filters[filter_y], ref,
-                                           kOutputStride, Width(), Height());
-
-        if (filter_x && filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
-              in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_x)
-          ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              kInvalidFilter, 16, Width(), Height()));
-        else
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                         0, kInvalidFilter, 0, Width(), Height()));
-
-        CheckGuardBlocks();
-
-        for (int y = 0; y < Height(); ++y) {
-          for (int x = 0; x < Width(); ++x)
-            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
-                      lookup(out, y * kOutputStride + x))
-                << "mismatch at (" << x << "," << y << "), "
-                << "filters (" << filter_bank << "," << filter_x << ","
-                << filter_y << ")";
+    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+      const InterpKernel *filters =
+          vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
+
+      for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+        for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+          wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x],
+                                      filters[filter_y], ref, kOutputStride,
+                                      Width(), Height(), UUT_->use_highbd_);
+
+          if (filter_x && filter_y)
+            ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                filters[filter_y], 16, Width(), Height()));
+          else if (filter_y)
+            ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
+                in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
+                filters[filter_y], 16, Width(), Height()));
+          else if (filter_x)
+            ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                kInvalidFilter, 16, Width(), Height()));
+          else
+            ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
+                in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
+                kInvalidFilter, 0, Width(), Height()));
+
+          CheckGuardBlocks();
+
+          for (int y = 0; y < Height(); ++y) {
+            for (int x = 0; x < Width(); ++x)
+              ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                        lookup(out, y * kOutputStride + x))
+                  << "mismatch at (" << x << "," << y << "), "
+                  << "filters (" << filter_bank << "," << filter_x << ","
+                  << filter_y << ")";
+          }
         }
       }
     }
@@ -852,21 +810,21 @@ TEST_P(ConvolveTest, FilterExtremes) {
           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
             wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
                                        filters[filter_y], ref, kOutputStride,
-                                       Width(), Height());
+                                       Width(), Height(), UUT_->use_highbd_);
             if (filter_x && filter_y)
-              ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
+              ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                   filters[filter_y], 16, Width(), Height()));
             else if (filter_y)
-              ASM_REGISTER_STATE_CHECK(UUT_->v8_(
+              ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
                   in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
                   filters[filter_y], 16, Width(), Height()));
             else if (filter_x)
-              ASM_REGISTER_STATE_CHECK(UUT_->h8_(
+              ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                   kInvalidFilter, 16, Width(), Height()));
             else
-              ASM_REGISTER_STATE_CHECK(UUT_->copy_(
+              ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
                   in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
                   kInvalidFilter, 0, Width(), Height()));
 
@@ -897,9 +855,9 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
   for (int frac = 0; frac < 16; ++frac) {
     for (int step = 1; step <= 32; ++step) {
       /* Test the horizontal and vertical filters in combination. */
-      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
-                                           eighttap[frac], step, eighttap[frac],
-                                           step, Width(), Height()));
+      ASM_REGISTER_STATE_CHECK(
+          UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
+                         step, eighttap[frac], step, Width(), Height()));
 
       CheckGuardBlocks();
 
@@ -1085,21 +1043,12 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
 #endif  // HAVE_AVX2 && HAVE_SSSE3
 
 #if HAVE_NEON
-#if HAVE_NEON_ASM
-const ConvolveFunctions convolve8_neon(
-    vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
-    vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
-    vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
-    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
-    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-#else   // HAVE_NEON
 const ConvolveFunctions convolve8_neon(
     vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
     vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
     vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-#endif  // HAVE_NEON_ASM
 
 const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
index 4c7ede8ca..030b67c57 100644
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -90,8 +90,7 @@ class ErrorResilienceTestLarge
     return frame_flags;
   }
 
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder * /*encoder*/) {
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
     frame_flags_ &=
         ~(VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF);
     // For temporal layer case.
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index bebbb141d..3955b6e3f 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -141,6 +141,8 @@ const DecodeParam kVP9InvalidFileTests[] = {
   { 1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf" },
   { 1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf" },
   { 1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf" },
+  { 1,
+    "invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf" },
 };
 
 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
diff --git a/test/test-data.mk b/test/test-data.mk
index da2fd77d4..80b802e0a 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -764,6 +764,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s195
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 7dd4fcf15..b97ae967e 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -838,3 +838,5 @@ a000d568431d07379dd5a8ec066061c07e560b47 *invalid-vp90-2-00-quantizer-63.ivf.kf_
 1e75aad3433c5c21c194a7b53fc393970f0a8d7f *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res
 235182f9a1c5c8841552510dd4288487447bfc40 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
 787f04f0483320d536894282f3358a4f8cac1cf9 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
+91d3cefd0deb98f3b0caf3a2d900ec7a7605e53a *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf
+1e472baaf5f6113459f0399a38a5a5e68d17799d *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 89abd41c0..28c981a60 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -83,8 +83,6 @@ static int get_cpu_count() {
 }
 #endif
 
-void vp8_clear_system_state_c(){};
-
 void vp8_machine_specific_config(VP8_COMMON *ctx) {
 #if CONFIG_MULTITHREAD
   ctx->processor_core_count = get_cpu_count();
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 8b8c1701a..d67ee8a57 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -12,6 +12,7 @@
 #include "vpx_dsp_rtcd.h"
 #include "vp8_rtcd.h"
 #include "vpx_dsp/postproc.h"
+#include "vpx_ports/system_state.h"
 #include "vpx_scale_rtcd.h"
 #include "vpx_scale/yv12config.h"
 #include "postproc.h"
@@ -321,7 +322,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest,
     }
   }
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid &&
       oci->current_video_frame >= 2 &&
@@ -363,7 +364,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest,
         oci->postproc_state.last_noise != noise_level) {
       double sigma;
       struct postproc_state *ppstate = &oci->postproc_state;
-      vp8_clear_system_state();
+      vpx_clear_system_state();
       sigma = noise_level + .5 + .6 * q / 63.0;
       ppstate->clamp =
           vpx_setup_noise(sigma, ppstate->generated_noise, oci->Width + 256);
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 5d8e4a78d..ca10a1a1e 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -19,13 +19,6 @@ EOF
 forward_decls qw/vp8_common_forward_decls/;
 
 #
-# system state
-#
-add_proto qw/void vp8_clear_system_state/, "";
-specialize qw/vp8_clear_system_state mmx/;
-$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
-
-#
 # Dequant
 #
 add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
@@ -33,34 +26,27 @@ specialize qw/vp8_dequantize_b mmx neon msa/;
 
 add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
 specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa/;
-$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
 
 add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
 specialize qw/vp8_dequant_idct_add_y_block mmx sse2 neon dspr2 msa/;
-$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
 
 add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
 specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 neon dspr2 msa/;
-$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
 
 #
 # Loopfilter
 #
 add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
 specialize qw/vp8_loop_filter_mbv mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
 
 add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
 specialize qw/vp8_loop_filter_bv mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
 
 add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
 specialize qw/vp8_loop_filter_mbh mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
 
 add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
 specialize qw/vp8_loop_filter_bh mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
 
 
 add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
@@ -101,38 +87,30 @@ $vp8_loop_filter_simple_bh_msa=vp8_loop_filter_bhs_msa;
 #idct16
 add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
 specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/;
-$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
 
 #iwalsh1
 add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
 specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
-$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
-# no asm yet
 
 #iwalsh16
 add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
 specialize qw/vp8_short_inv_walsh4x4 mmx sse2 neon dspr2 msa/;
-$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
 
 #idct1_scalar_add
 add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
 specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/;
-$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
 
 #
 # RECON
 #
 add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_copy_mem16x16 mmx sse2 neon dspr2 msa/;
-$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
 
 add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa/;
-$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
 
 add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa/;
-$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
 
 #
 # Postproc
@@ -140,13 +118,10 @@ $vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
 if (vpx_config("CONFIG_POSTPROC") eq "yes") {
 
     add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
-    # no asm yet
 
     add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
-    # no asm yet
 
     add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
-    # no asm yet
 
     add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
     specialize qw/vp8_filter_by_weight16x16 sse2 msa/;
@@ -155,7 +130,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
     specialize qw/vp8_filter_by_weight8x8 sse2 msa/;
 
     add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
-    # no asm yet
 }
 
 #
@@ -163,19 +137,15 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
 #
 add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
 
 add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
 
 add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
 
 add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_sixtap_predict4x4 mmx ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
 
 add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
 specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 neon msa/;
@@ -251,7 +221,9 @@ specialize qw/vp8_refining_search_sad sse3/;
 $vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
 
 add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
-$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
+specialize qw/vp8_diamond_search_sad sse2 msa/;
+$vp8_diamond_search_sad_sse2=vp8_diamond_search_sadx4;
+$vp8_diamond_search_sad_msa=vp8_diamond_search_sadx4;
 
 #
 # Alt-ref Noise Reduction (ARNR)
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index c6b566036..5b3ae1ce7 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -29,6 +29,7 @@
 #include "./vpx_scale_rtcd.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vp8/common/systemdependent.h"
+#include "vpx_ports/system_state.h"
 #include "vpx_ports/vpx_once.h"
 #include "vpx_ports/vpx_timer.h"
 #include "detokenize.h"
@@ -352,7 +353,7 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
     goto decode_exit;
   }
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if (cm->show_frame) {
     cm->current_video_frame++;
@@ -383,7 +384,7 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
 
 decode_exit:
   pbi->common.error.setjmp = 0;
-  vp8_clear_system_state();
+  vpx_clear_system_state();
   return retcode;
 }
 int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
@@ -416,7 +417,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
   }
 
 #endif /*!CONFIG_POSTPROC*/
-  vp8_clear_system_state();
+  vpx_clear_system_state();
   return ret;
 }
 
@@ -447,7 +448,7 @@ int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) {
     if (setjmp(fb->pbi[0]->common.error.jmp)) {
       vp8_remove_decoder_instances(fb);
       memset(fb->pbi, 0, sizeof(fb->pbi) / sizeof(fb->pbi[0]));
-      vp8_clear_system_state();
+      vpx_clear_system_state();
       return VPX_CODEC_ERROR;
     }
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 1b100cfe8..7086faae9 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -19,6 +19,7 @@
 #include <limits.h>
 #include "vpx/vpx_encoder.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/system_state.h"
 #include "bitstream.h"
 
 #include "defaultcoefcounts.h"
@@ -843,7 +844,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) {
   int new_intra, new_last, new_garf, oldtotal, newtotal;
   int ref_frame_cost[MAX_REF_FRAMES];
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if (cpi->common.frame_type != KEY_FRAME) {
     if (!(new_intra = rf_intra * 255 / (rf_intra + rf_inter))) new_intra = 1;
@@ -908,7 +909,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) {
 #endif
   int savings = 0;
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   do {
     int j = 0;
@@ -1295,7 +1296,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
 
 #endif
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
   pack_coef_probs(cpi);
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index cfc7af663..36e9a9078 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -12,6 +12,7 @@
 #include "encodemv.h"
 #include "vp8/common/entropymode.h"
 #include "vp8/common/systemdependent.h"
+#include "vpx_ports/system_state.h"
 
 #include <math.h>
 
@@ -126,7 +127,7 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc,
   unsigned int cost0 = 0;
   unsigned int cost1 = 0;
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   i = 1;
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index cd34e33fb..884d6e18b 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -26,6 +26,7 @@
 #include "vpx_scale/vpx_scale.h"
 #include "encodemb.h"
 #include "vp8/common/extend.h"
+#include "vpx_ports/system_state.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vp8/common/swapyv12buffer.h"
 #include "rdopt.h"
@@ -499,7 +500,7 @@ void vp8_first_pass(VP8_COMP *cpi) {
 
   zero_ref_mv.as_int = 0;
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   x->src = *cpi->Source;
   xd->pre = *lst_yv12;
@@ -741,10 +742,10 @@ void vp8_first_pass(VP8_COMP *cpi) {
     /* extend the recon for intra prediction */
     vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8,
                       xd->dst.v_buffer + 8);
-    vp8_clear_system_state();
+    vpx_clear_system_state();
   }
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
   {
     double weight = 0.0;
 
@@ -1655,7 +1656,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   cpi->twopass.gf_group_bits = 0;
   cpi->twopass.gf_decay_rate = 0;
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   start_pos = cpi->twopass.stats_in;
 
@@ -2268,7 +2269,7 @@ void vp8_second_pass(VP8_COMP *cpi) {
     return;
   }
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if (EOF == input_stats(cpi, &this_frame)) return;
 
@@ -2543,7 +2544,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   memset(&next_frame, 0, sizeof(next_frame));
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
   start_position = cpi->twopass.stats_in;
 
   cpi->common.frame_type = KEY_FRAME;
diff --git a/vp8/encoder/mips/msa/quantize_msa.c b/vp8/encoder/mips/msa/quantize_msa.c
index 11f70ae82..9f5fbd39c 100644
--- a/vp8/encoder/mips/msa/quantize_msa.c
+++ b/vp8/encoder/mips/msa/quantize_msa.c
@@ -12,10 +12,9 @@
 #include "vp8/common/mips/msa/vp8_macros_msa.h"
 #include "vp8/encoder/block.h"
 
-static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *zbin,
-                                  int16_t *round, int16_t *quant,
-                                  int16_t *de_quant, int16_t *q_coeff,
-                                  int16_t *dq_coeff) {
+static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *round,
+                                  int16_t *quant, int16_t *de_quant,
+                                  int16_t *q_coeff, int16_t *dq_coeff) {
   int32_t cnt, eob;
   v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 };
   v8i16 round0, round1;
@@ -184,15 +183,14 @@ static int8_t exact_regular_quantize_b_msa(
 
 void vp8_fast_quantize_b_msa(BLOCK *b, BLOCKD *d) {
   int16_t *coeff_ptr = b->coeff;
-  int16_t *zbin_ptr = b->zbin;
   int16_t *round_ptr = b->round;
   int16_t *quant_ptr = b->quant_fast;
   int16_t *qcoeff_ptr = d->qcoeff;
   int16_t *dqcoeff_ptr = d->dqcoeff;
   int16_t *dequant_ptr = d->dequant;
 
-  *d->eob = fast_quantize_b_msa(coeff_ptr, zbin_ptr, round_ptr, quant_ptr,
-                                dequant_ptr, qcoeff_ptr, dqcoeff_ptr);
+  *d->eob = fast_quantize_b_msa(coeff_ptr, round_ptr, quant_ptr, dequant_ptr,
+                                qcoeff_ptr, dqcoeff_ptr);
 }
 
 void vp8_regular_quantize_b_msa(BLOCK *b, BLOCKD *d) {
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 6ebf233ed..49ee37a5b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -33,6 +33,7 @@
 #include "vp8/common/reconintra.h"
 #include "vp8/common/swapyv12buffer.h"
 #include "vp8/common/threading.h"
+#include "vpx_ports/system_state.h"
 #include "vpx_ports/vpx_timer.h"
 #if ARCH_ARM
 #include "vpx_ports/arm.h"
@@ -2296,7 +2297,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
     recon += recon_stride;
   }
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
   return total_sse;
 }
 
@@ -2691,7 +2692,7 @@ static int decide_key_frame(VP8_COMP *cpi) {
   if (cpi->Speed > 11) return 0;
 
   /* Clear down mmx registers */
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) {
     double change = 1.0 *
@@ -3129,7 +3130,7 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) {
   } else {
     struct vpx_usec_timer timer;
 
-    vp8_clear_system_state();
+    vpx_clear_system_state();
 
     vpx_usec_timer_start(&timer);
     if (cpi->sf.auto_filter == 0) {
@@ -3217,7 +3218,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
   int drop_mark25 = drop_mark / 8;
 
   /* Clear down mmx registers to allow floating point in what follows */
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if (cpi->force_next_frame_intra) {
     cm->frame_type = KEY_FRAME; /* delayed intra frame */
@@ -3576,7 +3577,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
    * There is some odd behavior for one pass here that needs attention.
    */
   if ((cpi->pass == 2) || (cpi->ni_frames > 150)) {
-    vp8_clear_system_state();
+    vpx_clear_system_state();
 
     Q = cpi->active_worst_quality;
 
@@ -3802,7 +3803,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
 #endif
 
   do {
-    vp8_clear_system_state();
+    vpx_clear_system_state();
 
     vp8_set_quantizer(cpi, Q);
 
@@ -3935,7 +3936,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
     cpi->projected_frame_size =
         (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
 #endif
-    vp8_clear_system_state();
+    vpx_clear_system_state();
 
     /* Test to see if the stats generated for this frame indicate that
      * we should have coded a key frame (assuming that we didn't)!
@@ -3979,7 +3980,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
 #endif
     }
 
-    vp8_clear_system_state();
+    vpx_clear_system_state();
 
     if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
 
@@ -4549,7 +4550,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
     {
         FILE *f = fopen("tmp.stt", "a");
 
-        vp8_clear_system_state();
+        vpx_clear_system_state();
 
         if (cpi->twopass.total_left_stats.coded_error != 0.0)
             fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64
@@ -4779,7 +4780,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
 
   if (setjmp(cpi->common.error.jmp)) {
     cpi->common.error.setjmp = 0;
-    vp8_clear_system_state();
+    vpx_clear_system_state();
     return VPX_CODEC_CORRUPT_FRAME;
   }
 
@@ -4986,7 +4987,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
   *size = 0;
 
   /* Clear down mmx registers */
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   cm->frame_type = INTER_FRAME;
   cm->frame_flags = *frame_flags;
@@ -5139,7 +5140,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
 
           vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer,
                       cm->filter_level * 10 / 6, 1, 0);
-          vp8_clear_system_state();
+          vpx_clear_system_state();
 
           ye = calc_plane_error(orig->y_buffer, orig->y_stride, pp->y_buffer,
                                 pp->y_stride, y_width, y_height);
@@ -5249,7 +5250,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest,
     }
 
 #endif
-    vp8_clear_system_state();
+    vpx_clear_system_state();
     return ret;
   }
 }
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 649f696dc..4d6afc19b 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -22,6 +22,7 @@
 #include "vp8/common/systemdependent.h"
 #include "encodemv.h"
 #include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/system_state.h"
 
 #define MIN_BPB_FACTOR 0.01
 #define MAX_BPB_FACTOR 50
@@ -296,7 +297,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi) {
   uint64_t target;
 
   /* Clear down mmx registers to allow floating point in what follows */
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if (cpi->oxcf.fixed_q >= 0) {
     int Q = cpi->oxcf.key_q;
@@ -1019,7 +1020,7 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
   int projected_size_based_on_q = 0;
 
   /* Clear down mmx registers to allow floating point in what follows */
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   if (cpi->common.frame_type == KEY_FRAME) {
     rate_correction_factor = cpi->key_frame_rate_correction_factor;
@@ -1302,7 +1303,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) {
 
 void vp8_adjust_key_frame_context(VP8_COMP *cpi) {
   /* Clear down mmx registers to allow floating point in what follows */
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   /* Do we have any key frame overspend to recover? */
   /* Two-pass overspend handled elsewhere. */
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 9ba301e08..7bbeb2857 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -30,6 +30,7 @@
 #include "encodemb.h"
 #include "vp8/encoder/quantize.h"
 #include "vpx_dsp/variance.h"
+#include "vpx_ports/system_state.h"
 #include "mcomp.h"
 #include "rdopt.h"
 #include "vpx_mem/vpx_mem.h"
@@ -163,7 +164,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
   double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
   double rdconst = 2.80;
 
-  vp8_clear_system_state();
+  vpx_clear_system_state();
 
   /* Further tests required to see if optimum is different
    * for key frames, golden frames and arf frames.
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index b1f8340d6..8145a6118 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -24,6 +24,7 @@
 #include "decoder/onyxd_int.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/system_state.h"
 #if CONFIG_ERROR_CONCEALMENT
 #include "decoder/error_concealment.h"
 #endif
@@ -365,7 +366,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
            * reallocation is attempted on resync. */
           ctx->si.w = 0;
           ctx->si.h = 0;
-          vp8_clear_system_state();
+          vpx_clear_system_state();
           /* same return value as used in vp8dx_receive_compressed_data */
           return -1;
         }
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index a8cc7e93a..e2fe37a32 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -27,12 +27,9 @@ void vp9_init_mv_probs(struct VP9Common *cm);
 
 void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
 
-// Integer pel reference mv threshold for use of high-precision 1/8 mv
-#define COMPANDED_MVREF_THRESH 8
-
 static INLINE int use_mv_hp(const MV *ref) {
-  return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
-         (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
+  const int kMvRefThresh = 64;  // threshold for use of high-precision 1/8 mv
+  return abs(ref->row) < kMvRefThresh && abs(ref->col) < kMvRefThresh;
 }
 
 #define MV_UPDATE_PROB 252
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 7048fb1ca..a441f3add 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -170,7 +170,12 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
                   read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range);
           }
         }
+#if CONFIG_VP9_HIGHBITDEPTH
+        // val may use 18-bits
+        v = (int)(((int64_t)val * dqv) >> dq_shift);
+#else
         v = (val * dqv) >> dq_shift;
+#endif
       } else {
         if (read_bool(r, p[1], &value, &count, &range)) {
           token_cache[scan[c]] = 3;
@@ -188,9 +193,8 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
     }
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
 #if CONFIG_VP9_HIGHBITDEPTH
-    dqcoeff[scan[c]] =
-        highbd_check_range(read_bool(r, 128, &value, &count, &range) ? -v : v),
-                           xd->bd);
+    dqcoeff[scan[c]] = highbd_check_range(
+        read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd);
 #else
     dqcoeff[scan[c]] =
         check_range(read_bool(r, 128, &value, &count, &range) ? -v : v);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 272dde593..2f1fe360d 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2093,6 +2093,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double mv_in_out_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
   double mv_ratio_accumulator_thresh;
+  double mv_in_out_thresh;
+  double abs_mv_in_out_thresh;
   unsigned int allow_alt_ref = is_altref_enabled(cpi);
 
   int f_boost = 0;
@@ -2136,6 +2138,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Motion breakout threshold for loop below depends on image size.
   mv_ratio_accumulator_thresh =
       (cpi->initial_height + cpi->initial_width) / 4.0;
+  mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 300.0;
+  abs_mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 200.0;
 
   // Set a maximum and minimum interval for the GF group.
   // If the image appears almost completely static we can extend beyond this.
@@ -2232,8 +2236,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
             ((rc->frames_to_key - i) >= rc->min_gf_interval) &&
             (!flash_detected) &&
             ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
-             (abs_mv_in_out_accumulator > 3.0) ||
-             (mv_in_out_accumulator < -2.0) ||
+             (abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
+             (mv_in_out_accumulator < -mv_in_out_thresh) ||
              ((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
       boost_score = old_boost_score;
       break;
@@ -2265,6 +2269,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     rc->source_alt_ref_pending = 0;
   }
 
+  // Limit maximum boost based on interval length.
+  rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200);
+
   // Set the interval until the next gf.
   rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
 
diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
index 69cb28400..8e5373be0 100644
--- a/vpx_dsp/arm/vpx_convolve8_avg_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
@@ -64,6 +64,10 @@ void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
 
   assert(x_step_q4 == 16);
 
+  (void)x_step_q4;
+  (void)y_step_q4;
+  (void)filter_y;
+
   q0s16 = vld1q_s16(filter_x);
 
   src -= 3;                // adjust for taps
@@ -240,6 +244,10 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
 
   assert(y_step_q4 == 16);
 
+  (void)x_step_q4;
+  (void)y_step_q4;
+  (void)filter_x;
+
   src -= src_stride * 3;
   q0s16 = vld1q_s16(filter_y);
   for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c
index 514525696..951c425e2 100644
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -64,6 +64,10 @@ void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
 
   assert(x_step_q4 == 16);
 
+  (void)x_step_q4;
+  (void)y_step_q4;
+  (void)filter_y;
+
   q0s16 = vld1q_s16(filter_x);
 
   src -= 3;  // adjust for taps
@@ -224,6 +228,10 @@ void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
 
   assert(y_step_q4 == 16);
 
+  (void)x_step_q4;
+  (void)y_step_q4;
+  (void)filter_x;
+
   src -= src_stride * 3;
   q0s16 = vld1q_s16(filter_y);
   for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c
index 4d5eb5a6f..0469071a1 100644
--- a/vpx_dsp/fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -202,6 +202,7 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {
   if (bit_depth == 12) ssim_c1 = SSIM_C1_12;
 #else
   assert(bit_depth == 8);
+  (void)bit_depth;
 #endif
   w = _ctx->level[_l].w;
   h = _ctx->level[_l].h;
@@ -326,6 +327,7 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {
   if (bit_depth == 12) ssim_c2 = SSIM_C2_12;
 #else
   assert(bit_depth == 8);
+  (void)bit_depth;
 #endif
 
   w = _ctx->level[_l].w;
diff --git a/vpx_dsp/mips/add_noise_msa.c b/vpx_dsp/mips/add_noise_msa.c
index e372b9d8c..48278d2ec 100644
--- a/vpx_dsp/mips/add_noise_msa.c
+++ b/vpx_dsp/mips/add_noise_msa.c
@@ -14,7 +14,7 @@
 void vpx_plane_add_noise_msa(uint8_t *start_ptr, const int8_t *noise,
                              int blackclamp, int whiteclamp, int width,
                              int height, int32_t pitch) {
-  uint32_t i, j;
+  int i, j;
 
   for (i = 0; i < height / 2; ++i) {
     uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
diff --git a/vpx_dsp/prob.h b/vpx_dsp/prob.h
index 3127a00bb..5656ddbab 100644
--- a/vpx_dsp/prob.h
+++ b/vpx_dsp/prob.h
@@ -11,6 +11,8 @@
 #ifndef VPX_DSP_PROB_H_
 #define VPX_DSP_PROB_H_
 
+#include <assert.h>
+
 #include "./vpx_config.h"
 #include "./vpx_dsp_common.h"
 
@@ -43,17 +45,20 @@ typedef int8_t vpx_tree_index;
 
 typedef const vpx_tree_index vpx_tree[];
 
-static INLINE vpx_prob clip_prob(int p) {
-  return (p > 255) ? 255 : (p < 1) ? 1 : p;
-}
-
 static INLINE vpx_prob get_prob(unsigned int num, unsigned int den) {
-  if (den == 0) return 128u;
-  return clip_prob((int)(((int64_t)num * 256 + (den >> 1)) / den));
+  assert(den != 0);
+  {
+    const int p = (int)(((int64_t)num * 256 + (den >> 1)) / den);
+    // (p > 255) ? 255 : (p < 1) ? 1 : p;
+    const int clipped_prob = p | ((255 - p) >> 23) | (p == 0);
+    return (vpx_prob)clipped_prob;
+  }
 }
 
 static INLINE vpx_prob get_binary_prob(unsigned int n0, unsigned int n1) {
-  return get_prob(n0, n0 + n1);
+  const unsigned int den = n0 + n1;
+  if (den == 0) return 128u;
+  return get_prob(n0, den);
 }
 
 /* This function assumes prob1 and prob2 are already within [1,255] range. */
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 9fea2d1cf..46dd243f3 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -11,12 +11,6 @@ EOF
 }
 forward_decls qw/vpx_dsp_forward_decls/;
 
-# optimizations which depend on multiple features
-$avx2_ssse3 = '';
-if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
-  $avx2_ssse3 = 'avx2';
-}
-
 # functions that are 64 bit only.
 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
 if ($opts{arch} eq "x86_64") {
@@ -437,13 +431,13 @@ add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride,
 specialize qw/vpx_convolve_avg neon dspr2 msa sse2/;
 
 add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa/;
 
 add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa/;
 
 add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa/;
 
 add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;
diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h
index 2a0516cdc..d7468ad7c 100644
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -25,6 +25,10 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
       ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,          \
       const int16_t *filter_y, int y_step_q4, int w, int h) {                \
+    (void)filter_x;                                                          \
+    (void)x_step_q4;                                                         \
+    (void)filter_y;                                                          \
+    (void)y_step_q4;                                                         \
     assert(filter[3] != 128);                                                \
     assert(step_q4 == 16);                                                   \
     if (filter[0] | filter[1] | filter[2]) {                                 \
diff --git a/vpxdec.c b/vpxdec.c
index ab638ec6b..d1ed3e6ca 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -495,6 +495,7 @@ static int main_loop(int argc, const char **argv_) {
   vpx_codec_ctx_t decoder;
   char *fn = NULL;
   int i;
+  int ret = EXIT_FAILURE;
   uint8_t *buf = NULL;
   size_t bytes_in_buffer = 0, buffer_size = 0;
   FILE *infile;
@@ -723,7 +724,7 @@ static int main_loop(int argc, const char **argv_) {
                          dec_flags)) {
     fprintf(stderr, "Failed to initialize decoder: %s\n",
             vpx_codec_error(&decoder));
-    return EXIT_FAILURE;
+    goto fail2;
   }
 
   if (!quiet) fprintf(stderr, "%s\n", decoder.name);
@@ -733,7 +734,7 @@ static int main_loop(int argc, const char **argv_) {
       vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
     fprintf(stderr, "Failed to configure postproc: %s\n",
             vpx_codec_error(&decoder));
-    return EXIT_FAILURE;
+    goto fail;
   }
 #endif
 
@@ -752,7 +753,7 @@ static int main_loop(int argc, const char **argv_) {
                                              &ext_fb_list)) {
       fprintf(stderr, "Failed to configure external frame buffers: %s\n",
               vpx_codec_error(&decoder));
-      return EXIT_FAILURE;
+      goto fail;
     }
   }
 
@@ -861,7 +862,7 @@ static int main_loop(int argc, const char **argv_) {
                   "Scaling is disabled in this configuration. "
                   "To enable scaling, configure with --enable-libyuv\n",
                   vpx_codec_error(&decoder));
-          return EXIT_FAILURE;
+          goto fail;
 #endif
         }
       }
@@ -972,17 +973,21 @@ static int main_loop(int argc, const char **argv_) {
     fprintf(stderr, "\n");
   }
 
-  if (frames_corrupted)
+  if (frames_corrupted) {
     fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted);
+  } else {
+    ret = EXIT_SUCCESS;
+  }
 
 fail:
 
   if (vpx_codec_destroy(&decoder)) {
     fprintf(stderr, "Failed to destroy decoder: %s\n",
             vpx_codec_error(&decoder));
-    return EXIT_FAILURE;
   }
 
+fail2:
+
   if (!noblit && single_file) {
     if (do_md5) {
       MD5Final(md5_digest, &md5_ctx);
@@ -1012,7 +1017,7 @@ fail:
   fclose(infile);
   free(argv);
 
-  return frames_corrupted ? EXIT_FAILURE : EXIT_SUCCESS;
+  return ret;
 }
 
 int main(int argc, const char **argv_) {