50 files changed, 627 insertions, 306 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 9c3044168..c592b6385 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -201,6 +201,10 @@ disabled(){
   eval test "x\$$1" = "xno"
 }
 
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) disabled, and enables the setting controlled by
+# the parameter when the setting is not disabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
 soft_enable() {
   for var in $*; do
     if ! disabled $var; then
@@ -210,6 +214,10 @@ soft_enable() {
   done
 }
 
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) enabled, and disables the setting controlled by
+# the parameter when the setting is not enabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
 soft_disable() {
   for var in $*; do
     if ! enabled $var; then
@@ -625,6 +633,11 @@ show_darwin_sdk_path() {
     xcodebuild -sdk $1 -version Path 2>/dev/null
 }
 
+# Print the major version number of the Darwin SDK specified by $1.
+show_darwin_sdk_major_version() {
+  xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
+}
+
 process_common_toolchain() {
   if [ -z "$toolchain" ]; then
     gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
@@ -736,7 +749,15 @@ process_common_toolchain() {
   # Handle darwin variants. Newer SDKs allow targeting older
   # platforms, so use the newest one available.
   case ${toolchain} in
-    *-darwin*)
+    arm*-darwin*)
+      add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
+      iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+      if [ -d "${iphoneos_sdk_dir}" ]; then
+        add_cflags  "-isysroot ${iphoneos_sdk_dir}"
+        add_ldflags "-isysroot ${iphoneos_sdk_dir}"
+      fi
+      ;;
+    x86*-darwin*)
       osx_sdk_dir="$(show_darwin_sdk_path macosx)"
       if [ -d "${osx_sdk_dir}" ]; then
         add_cflags  "-isysroot ${osx_sdk_dir}"
@@ -811,10 +832,36 @@ process_common_toolchain() {
           if disabled neon && enabled neon_asm; then
             die "Disabling neon while keeping neon-asm is not supported"
           fi
-          soft_enable media
+          case ${toolchain} in
+            # Apple iOS SDKs no longer support armv6 as of the version 9
+            # release (coincides with release of Xcode 7). Only enable media
+            # when using earlier SDK releases.
+            *-darwin*)
+              if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then
+                soft_enable media
+              else
+                soft_disable media
+                RTCD_OPTIONS="${RTCD_OPTIONS}--disable-media "
+              fi
+              ;;
+            *)
+              soft_enable media
+              ;;
+          esac
           ;;
         armv6)
-          soft_enable media
+          case ${toolchain} in
+            *-darwin*)
+              if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then
+                soft_enable media
+              else
+                die "Your iOS SDK does not support armv6."
+              fi
+              ;;
+            *)
+              soft_enable media
+              ;;
+          esac
           ;;
       esac
 
@@ -997,6 +1044,12 @@ EOF
           done
 
           asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
+
+          if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
+            check_add_cflags -fembed-bitcode
+            check_add_asflags -fembed-bitcode
+            check_add_ldflags -fembed-bitcode
+          fi
           ;;
 
         linux*)
@@ -1167,7 +1220,8 @@ EOF
               && AS=""
           fi
           [ "${AS}" = auto ] || [ -z "${AS}" ] \
-            && die "Neither yasm nor nasm have been found"
+            && die "Neither yasm nor nasm have been found." \
+                   "See the prerequisites section in the README for more info."
           ;;
       esac
       log_echo "  using $AS"
@@ -1206,6 +1260,13 @@ EOF
           enabled x86 && sim_arch="-arch i386" || sim_arch="-arch x86_64"
           add_cflags  ${sim_arch}
           add_ldflags ${sim_arch}
+
+          if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then
+            # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it
+            # on is pointless (unless building a C-only lib). Warn the user, but
+            # do nothing here.
+            log "Warning: Bitcode embed disabled for simulator targets."
+          fi
           ;;
         os2)
           add_asflags -f aout
diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh
index 927f3e532..6f7180d08 100755
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -25,7 +25,6 @@ CONFIGURE_ARGS="--disable-docs
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-MAKE_JOBS=1
 SCRIPT_DIR=$(dirname "$0")
 LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
@@ -58,7 +57,7 @@ build_target() {
     ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \
     ${devnull}
   export DIST_DIR
-  eval make -j ${MAKE_JOBS} dist ${devnull}
+  eval make dist ${devnull}
   cd "${old_pwd}"
 
   vlog "***Done building target: ${target}***"
@@ -203,7 +202,6 @@ cat << EOF
   Usage: ${0##*/} [arguments]
     --help: Display this message and exit.
     --extra-configure-args <args>: Extra args to pass when configuring libvpx.
-    --jobs: Number of make jobs.
     --preserve-build-output: Do not delete the build directory.
     --show-build-output: Show output from each library build.
     --targets <targets>: Override default target list. Defaults:
@@ -238,10 +236,6 @@ while [ -n "$1" ]; do
       iosbuild_usage
       exit
       ;;
-    --jobs)
-      MAKE_JOBS="$2"
-      shift
-      ;;
     --preserve-build-output)
       PRESERVE_BUILD_OUTPUT=yes
       ;;
@@ -274,11 +268,11 @@ cat << EOF
   EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
   FRAMEWORK_DIR=${FRAMEWORK_DIR}
   HEADER_DIR=${HEADER_DIR}
-  MAKE_JOBS=${MAKE_JOBS}
-  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
   LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
   LIPO=${LIPO}
+  MAKEFLAGS=${MAKEFLAGS}
   ORIG_PWD=${ORIG_PWD}
+  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
   TARGETS="${TARGETS}"
 EOF
 fi
diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
index 9ecc4989e..6d0a72f98 100644
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -124,6 +124,11 @@ class Encoder {
     ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
   }
 
+  void Control(int ctrl_id, int *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+
   void Control(int ctrl_id, struct vpx_scaling_mode *arg) {
     const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
     ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
diff --git a/test/resize_test.cc b/test/resize_test.cc
index a86c9d115..98b6f87e1 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -196,13 +196,27 @@ class ResizeInternalTest : public ResizeTest {
 
   virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                   libvpx_test::Encoder *encoder) {
-    if (video->frame() == kStepDownFrame) {
-      struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
-      encoder->Control(VP8E_SET_SCALEMODE, &mode);
-    }
-    if (video->frame() == kStepUpFrame) {
-      struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
-      encoder->Control(VP8E_SET_SCALEMODE, &mode);
+    if (change_config_) {
+      int new_q = 60;
+      if (video->frame() == 0) {
+        struct vpx_scaling_mode mode = {VP8E_ONETWO, VP8E_ONETWO};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() == 1) {
+        struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+        cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
+        encoder->Config(&cfg_);
+      }
+    } else {
+      if (video->frame() == kStepDownFrame) {
+        struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() == kStepUpFrame) {
+        struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+      }
     }
   }
 
@@ -227,6 +241,7 @@ class ResizeInternalTest : public ResizeTest {
 #endif
 
   double frame0_psnr_;
+  bool change_config_;
 #if WRITE_COMPRESSED_STREAM
   FILE *outfile_;
   unsigned int out_frames_;
@@ -237,6 +252,7 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 10);
   init_flags_ = VPX_CODEC_USE_PSNR;
+  change_config_ = false;
 
   // q picked such that initial keyframe on this clip is ~30dB PSNR
   cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
@@ -261,6 +277,15 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
   }
 }
 
+TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+  change_config_ = true;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
 class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
   public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
  protected:
diff --git a/test/test.mk b/test/test.mk
index fde970311..bb5186b00 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -168,7 +168,7 @@ TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
 TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
 
 ## VP10
-LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
 
 endif # CONFIG_SHARED
diff --git a/test/vp10_dct_test.cc b/test/vp10_dct_test.cc
index 8fb5f4ff3..b2c301ae3 100644
--- a/test/vp10_dct_test.cc
+++ b/test/vp10_dct_test.cc
@@ -17,7 +17,10 @@
 #include "test/util.h"
 #include "./vpx_config.h"
 #include "vpx_ports/msvc.h"
-#include "vp10/encoder/dct.h"
+
+#undef CONFIG_COEFFICIENT_RANGE_CHECKING
+#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
+#include "vp10/encoder/dct.c"
 
 using libvpx_test::ACMRandom;
 
@@ -102,8 +105,7 @@ TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) {
 INSTANTIATE_TEST_CASE_P(
     C, Vp10FwdTxfm,
     ::testing::Values(
-        FdctParam(&vp10_fdct4, &reference_dct_1d, 4, 1),
-        FdctParam(&vp10_fdct8, &reference_dct_1d, 8, 1),
-        FdctParam(&vp10_fdct16, &reference_dct_1d, 16, 2),
-        FdctParam(&vp10_fdct32_local, &reference_dct_1d, 32, 4)));
+        FdctParam(&fdct4, &reference_dct_1d, 4, 1),
+        FdctParam(&fdct8, &reference_dct_1d, 8, 1),
+        FdctParam(&fdct16, &reference_dct_1d, 16, 2)));
 }  // namespace
diff --git a/test/vp9_encoder_parms_get_to_decoder.cc b/test/vp9_encoder_parms_get_to_decoder.cc
index 901605d06..0984e6a42 100644
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@@ -42,6 +42,7 @@ struct EncodeParameters {
   int32_t frame_parallel;
   int32_t color_range;
   vpx_color_space_t cs;
+  int render_size[2];
   // TODO(JBB): quantizers / bitrate
 };
 
@@ -49,7 +50,7 @@ const EncodeParameters kVP9EncodeParameterSet[] = {
   {0, 0, 0, 1, 0, 0, VPX_CS_BT_601},
   {0, 0, 0, 0, 0, 1, VPX_CS_BT_709},
   {0, 0, 1, 0, 0, 1, VPX_CS_BT_2020},
-  {0, 2, 0, 0, 1, 0, VPX_CS_UNKNOWN},
+  {0, 2, 0, 0, 1, 0, VPX_CS_UNKNOWN, { 640, 480 }},
   // TODO(JBB): Test profiles (requires more work).
 };
 
@@ -88,6 +89,8 @@ class VpxEncoderParmsGetToDecoder
       encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
       encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
       encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0)
+        encoder->Control(VP9E_SET_RENDER_SIZE, encode_parms.render_size);
     }
   }
 
@@ -118,6 +121,10 @@ class VpxEncoderParmsGetToDecoder
     }
     EXPECT_EQ(encode_parms.color_range, common->color_range);
     EXPECT_EQ(encode_parms.cs, common->color_space);
+    if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
+      EXPECT_EQ(encode_parms.render_size[0], common->render_width);
+      EXPECT_EQ(encode_parms.render_size[1], common->render_height);
+    }
     EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols);
     EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows);
 
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 144afc34b..5ee15c862 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -179,21 +179,24 @@ void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
 }
 
 void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
-                           int stride, int eob, TX_TYPE tx_type,
-                           void (*itxm_add_4x4)(const tran_low_t *input,
-                               uint8_t *dest, int stride, int eob)) {
-  switch (tx_type) {
-    case DCT_DCT:
-      itxm_add_4x4(input, dest, stride, eob);
-      break;
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
-      vp10_iht4x4_16_add(input, dest, stride, tx_type);
-      break;
-    default:
-      assert(0);
-      break;
+                           int stride, int eob, TX_TYPE tx_type, int lossless) {
+  if (lossless) {
+    assert(tx_type == DCT_DCT);
+    vp10_iwht4x4_add(input, dest, stride, eob);
+  } else {
+    switch (tx_type) {
+      case DCT_DCT:
+        vp10_idct4x4_add(input, dest, stride, eob);
+        break;
+      case ADST_DCT:
+      case DCT_ADST:
+      case ADST_ADST:
+        vp10_iht4x4_16_add(input, dest, stride, tx_type);
+        break;
+      default:
+        assert(0);
+        break;
+    }
   }
 }
 
@@ -418,21 +421,24 @@ void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
 
 void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
                                   int stride, int eob, int bd, TX_TYPE tx_type,
-                                  void (*highbd_itxm_add_4x4)
-                                  (const tran_low_t *input, uint8_t *dest,
-                                      int stride, int eob, int bd)) {
-  switch (tx_type) {
-    case DCT_DCT:
-      highbd_itxm_add_4x4(input, dest, stride, eob, bd);
-      break;
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
-      vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
-      break;
-    default:
-      assert(0);
-      break;
+                                  int lossless) {
+  if (lossless) {
+    assert(tx_type == DCT_DCT);
+    vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd);
+  } else {
+    switch (tx_type) {
+      case DCT_DCT:
+        vp10_highbd_idct4x4_add(input, dest, stride, eob, bd);
+        break;
+      case ADST_DCT:
+      case DCT_ADST:
+      case ADST_ADST:
+         vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+         break;
+      default:
+        assert(0);
+        break;
+    }
   }
 }
 
diff --git a/vp10/common/idct.h b/vp10/common/idct.h
index 2e000529b..088339804 100644
--- a/vp10/common/idct.h
+++ b/vp10/common/idct.h
@@ -44,9 +44,7 @@ void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                      int eob);
 
 void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
-                           int stride, int eob, TX_TYPE tx_type,
-                           void (*itxm_add_4x4)(const tran_low_t *input,
-                               uint8_t *dest, int stride, int eob));
+                           int stride, int eob, TX_TYPE tx_type, int lossless);
 void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
                            int stride, int eob, TX_TYPE tx_type);
 void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
@@ -67,9 +65,7 @@ void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
                               int stride, int eob, int bd);
 void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
                                   int stride, int eob, int bd, TX_TYPE tx_type,
-                                  void (*highbd_itxm_add_4x4)
-                                  (const tran_low_t *input, uint8_t *dest,
-                                      int stride, int eob, int bd));
+                                  int lossless);
 void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
                                   int stride, int eob, int bd, TX_TYPE tx_type);
 void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index 1b89ed5a2..a1925de55 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -719,7 +719,11 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
   uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
   uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+#if CONFIG_MISC_FIXES
+  uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
+#else
   uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
+#endif
   int i;
 
   // If filter level is 0 we don't loop filter.
@@ -1015,7 +1019,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
       lfm->above_uv[i] &= mask_uv;
     }
     lfm->int_4x4_y &= mask_y;
+#if CONFIG_MISC_FIXES
+    lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
+#else
     lfm->int_4x4_uv &= mask_uv;
+#endif
 
     // We don't apply a wide loop filter on the last uv block row. If set
     // apply the shorter one instead.
@@ -1049,7 +1057,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
       lfm->above_uv[i] &= mask_uv;
     }
     lfm->int_4x4_y &= mask_y;
+#if CONFIG_MISC_FIXES
+    lfm->left_int_4x4_uv &= mask_uv_int;
+#else
     lfm->int_4x4_uv &= mask_uv_int;
+#endif
 
     // We don't apply a wide loop filter on the last uv column. If set
     // apply the shorter one instead.
@@ -1079,7 +1091,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
   assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
   assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
   assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
+#if CONFIG_MISC_FIXES
+  assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
+#else
   assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
+#endif
   assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
   assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
   assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
@@ -1087,7 +1103,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
   assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
   assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
   assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
+#if CONFIG_MISC_FIXES
+  assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
+#else
   assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
+#endif
 }
 
 static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -1442,7 +1462,11 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
   uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
   uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
   uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
+#if CONFIG_MISC_FIXES
+  uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
+#else
   uint16_t mask_4x4_int = lfm->int_4x4_uv;
+#endif
 
   assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
 
@@ -1494,7 +1518,11 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
   mask_16x16 = lfm->above_uv[TX_16X16];
   mask_8x8 = lfm->above_uv[TX_8X8];
   mask_4x4 = lfm->above_uv[TX_4X4];
+#if CONFIG_MISC_FIXES
+  mask_4x4_int = lfm->above_int_4x4_uv;
+#else
   mask_4x4_int = lfm->int_4x4_uv;
+#endif
 
   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
     const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h
index 329ab75af..8db705aa0 100644
--- a/vp10/common/loopfilter.h
+++ b/vp10/common/loopfilter.h
@@ -80,7 +80,12 @@ typedef struct {
   uint64_t int_4x4_y;
   uint16_t left_uv[TX_SIZES];
   uint16_t above_uv[TX_SIZES];
+#if CONFIG_MISC_FIXES
+  uint16_t left_int_4x4_uv;
+  uint16_t above_int_4x4_uv;
+#else
   uint16_t int_4x4_uv;
+#endif
   uint8_t lfl_y[64];
   uint8_t lfl_uv[16];
 } LOOP_FILTER_MASK;
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index eeaadc61d..c345068b0 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -132,8 +132,8 @@ typedef struct VP10Common {
   int color_range;
   int width;
   int height;
-  int display_width;
-  int display_height;
+  int render_width;
+  int render_height;
   int last_width;
   int last_height;
 
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 50d50f88e..91096a2f6 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -124,6 +124,18 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
       vp10_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
 }
 
+#if CONFIG_MISC_FIXES
+static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
+    struct vpx_read_bit_buffer *rb) {
+  if (is_compound_reference_allowed(cm)) {
+    return vpx_rb_read_bit(rb) ? REFERENCE_MODE_SELECT
+                               : (vpx_rb_read_bit(rb) ? COMPOUND_REFERENCE
+                                                      : SINGLE_REFERENCE);
+  } else {
+    return SINGLE_REFERENCE;
+  }
+}
+#else
 static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
                                                 vpx_reader *r) {
   if (is_compound_reference_allowed(cm)) {
@@ -134,6 +146,7 @@ static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm,
     return SINGLE_REFERENCE;
   }
 }
+#endif
 
 static void read_frame_reference_mode_probs(VP10_COMMON *cm, vpx_reader *r) {
   FRAME_CONTEXT *const fc = cm->fc;
@@ -203,9 +216,7 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
       switch (tx_size) {
         case TX_4X4:
           vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd,
-                                       tx_type, xd->lossless ?
-                                           vp10_highbd_iwht4x4_add :
-                                           vp10_highbd_idct4x4_add);
+                                       tx_type, xd->lossless);
           break;
         case TX_8X8:
           vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd,
@@ -228,8 +239,7 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
       switch (tx_size) {
         case TX_4X4:
           vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type,
-                                xd->lossless ? vp10_iwht4x4_add :
-                                    vp10_idct4x4_add);
+                                xd->lossless);
           break;
         case TX_8X8:
           vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type);
@@ -274,9 +284,7 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
       switch (tx_size) {
         case TX_4X4:
           vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd,
-                                       tx_type, xd->lossless ?
-                                           vp10_highbd_iwht4x4_add :
-                                           vp10_highbd_idct4x4_add);
+                                       tx_type, xd->lossless);
           break;
         case TX_8X8:
           vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd,
@@ -299,8 +307,7 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
       switch (tx_size) {
         case TX_4X4:
           vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type,
-                                xd->lossless ? vp10_iwht4x4_add :
-                                    vp10_idct4x4_add);
+                                xd->lossless);
           break;
         case TX_8X8:
           vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type);
@@ -1169,12 +1176,12 @@ static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
   return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2);
 }
 
-static void setup_display_size(VP10_COMMON *cm,
-                               struct vpx_read_bit_buffer *rb) {
-  cm->display_width = cm->width;
-  cm->display_height = cm->height;
+static void setup_render_size(VP10_COMMON *cm,
+                              struct vpx_read_bit_buffer *rb) {
+  cm->render_width = cm->width;
+  cm->render_height = cm->height;
   if (vpx_rb_read_bit(rb))
-    vp10_read_frame_size(rb, &cm->display_width, &cm->display_height);
+    vp10_read_frame_size(rb, &cm->render_width, &cm->render_height);
 }
 
 static void resize_mv_buffer(VP10_COMMON *cm) {
@@ -1222,7 +1229,7 @@ static void setup_frame_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
   BufferPool *const pool = cm->buffer_pool;
   vp10_read_frame_size(rb, &width, &height);
   resize_context_buffers(cm, width, height);
-  setup_display_size(cm, rb);
+  setup_render_size(cm, rb);
 
   lock_buffer_pool(pool);
   if (vpx_realloc_frame_buffer(
@@ -1246,6 +1253,8 @@ static void setup_frame_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
   pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
   pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
   pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_width  = cm->render_width;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
 static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
@@ -1304,7 +1313,7 @@ static void setup_frame_size_with_refs(VP10_COMMON *cm,
   }
 
   resize_context_buffers(cm, width, height);
-  setup_display_size(cm, rb);
+  setup_render_size(cm, rb);
 
   lock_buffer_pool(pool);
   if (vpx_realloc_frame_buffer(
@@ -1328,6 +1337,8 @@ static void setup_frame_size_with_refs(VP10_COMMON *cm,
   pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
   pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
   pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_width  = cm->render_width;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
 static void setup_tile_info(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
@@ -1968,6 +1979,8 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
 #endif
   get_frame_new_buffer(cm)->color_space = cm->color_space;
   get_frame_new_buffer(cm)->color_range = cm->color_range;
+  get_frame_new_buffer(cm)->render_width  = cm->render_width;
+  get_frame_new_buffer(cm)->render_height = cm->render_height;
 
   if (pbi->need_resync) {
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
@@ -2029,6 +2042,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
   setup_segmentation_dequant(cm);
 #if CONFIG_MISC_FIXES
   cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(rb);
+  cm->reference_mode = read_frame_reference_mode(cm, rb);
 #endif
 
   setup_tile_info(cm, rb);
@@ -2078,7 +2092,9 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
       vp10_diff_update_prob(&r, &fc->intra_inter_prob[i]);
 
+#if !CONFIG_MISC_FIXES
     cm->reference_mode = read_frame_reference_mode(cm, &r);
+#endif
     if (cm->reference_mode != SINGLE_REFERENCE)
       setup_compound_reference_mode(cm);
     read_frame_reference_mode_probs(cm, &r);
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index 81bd35787..23851afa7 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -126,6 +126,9 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
 void vp10_decoder_remove(VP10Decoder *pbi) {
   int i;
 
+  if (!pbi)
+    return;
+
   vpx_get_worker_interface()->end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
   vpx_free(pbi->tile_data);
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index da515990f..348c7cb83 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -982,14 +982,14 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr) {
   return total_size;
 }
 
-static void write_display_size(const VP10_COMMON *cm,
-                               struct vpx_write_bit_buffer *wb) {
-  const int scaling_active = cm->width != cm->display_width ||
-                             cm->height != cm->display_height;
+static void write_render_size(const VP10_COMMON *cm,
+                              struct vpx_write_bit_buffer *wb) {
+  const int scaling_active = cm->width != cm->render_width ||
+                             cm->height != cm->render_height;
   vpx_wb_write_bit(wb, scaling_active);
   if (scaling_active) {
-    vpx_wb_write_literal(wb, cm->display_width - 1, 16);
-    vpx_wb_write_literal(wb, cm->display_height - 1, 16);
+    vpx_wb_write_literal(wb, cm->render_width - 1, 16);
+    vpx_wb_write_literal(wb, cm->render_height - 1, 16);
   }
 }
 
@@ -998,7 +998,7 @@ static void write_frame_size(const VP10_COMMON *cm,
   vpx_wb_write_literal(wb, cm->width - 1, 16);
   vpx_wb_write_literal(wb, cm->height - 1, 16);
 
-  write_display_size(cm, wb);
+  write_render_size(cm, wb);
 }
 
 static void write_frame_size_with_refs(VP10_COMP *cpi,
@@ -1025,7 +1025,7 @@ static void write_frame_size_with_refs(VP10_COMP *cpi,
     vpx_wb_write_literal(wb, cm->height - 1, 16);
   }
 
-  write_display_size(cm, wb);
+  write_render_size(cm, wb);
 }
 
 static void write_sync_code(struct vpx_write_bit_buffer *wb) {
@@ -1169,6 +1169,14 @@ static void write_uncompressed_header(VP10_COMP *cpi,
     cm->tx_mode = TX_4X4;
   else
     write_txfm_mode(cm->tx_mode, wb);
+  if (cpi->allow_comp_inter_inter) {
+    const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
+    const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
+
+    vpx_wb_write_bit(wb, use_hybrid_pred);
+    if (!use_hybrid_pred)
+      vpx_wb_write_bit(wb, use_compound_pred);
+  }
 #endif
 
   write_tile_info(cm, wb);
@@ -1208,8 +1216,9 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
                                 counts->intra_inter[i]);
 
     if (cpi->allow_comp_inter_inter) {
-      const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
       const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
+#if !CONFIG_MISC_FIXES
+      const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
 
       vpx_write_bit(&header_bc, use_compound_pred);
       if (use_compound_pred) {
@@ -1219,6 +1228,12 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
             vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
                                       counts->comp_inter[i]);
       }
+#else
+      if (use_hybrid_pred)
+        for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+          vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
+                                     counts->comp_inter[i]);
+#endif
     }
 
     if (cm->reference_mode != COMPOUND_REFERENCE) {
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index 9eff31a2e..cb2a234c9 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -132,13 +132,6 @@ struct macroblock {
   // Strong color activity detection. Used in RTC coding mode to enhance
   // the visual quality at the boundary of moving color objects.
   uint8_t color_sensitivity[2];
-
-  void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
-  void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
-#if CONFIG_VP9_HIGHBITDEPTH
-  void (*highbd_itxm_add)(const tran_low_t *input, uint8_t *dest, int stride,
-                          int eob, int bd);
-#endif
 };
 
 #ifdef __cplusplus
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 78f151ebb..c61babefd 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -34,7 +34,7 @@ static INLINE void range_check(const tran_low_t *input, const int size,
 #endif
 }
 
-void vp10_fdct4(const tran_low_t *input, tran_low_t *output) {
+static void fdct4(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
   tran_low_t step[4];
 
@@ -70,7 +70,7 @@ void vp10_fdct4(const tran_low_t *input, tran_low_t *output) {
   range_check(output, 4, 13);
 }
 
-void vp10_fdct8(const tran_low_t *input, tran_low_t *output) {
+static void fdct8(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
   tran_low_t step[8];
 
@@ -148,7 +148,7 @@ void vp10_fdct8(const tran_low_t *input, tran_low_t *output) {
   range_check(output, 8, 14);
 }
 
-void vp10_fdct16(const tran_low_t *input, tran_low_t *output) {
+static void fdct16(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
   tran_low_t step[16];
 
@@ -322,8 +322,8 @@ void vp10_fdct16(const tran_low_t *input, tran_low_t *output) {
   range_check(output, 16, 16);
 }
 
-// TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
-void vp10_fdct32_local(const tran_low_t *input, tran_low_t *output) {
+/* #TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
+static void fdct32(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
   tran_low_t step[32];
 
@@ -720,6 +720,7 @@ void vp10_fdct32_local(const tran_low_t *input, tran_low_t *output) {
 
   range_check(output, 32, 18);
 }
+*/
 
 static void fadst4(const tran_low_t *input, tran_low_t *output) {
   tran_high_t x0, x1, x2, x3;
@@ -996,24 +997,24 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
 }
 
 static const transform_2d FHT_4[] = {
-  { vp10_fdct4, vp10_fdct4 },  // DCT_DCT  = 0
-  { fadst4,     vp10_fdct4 },  // ADST_DCT = 1
-  { vp10_fdct4, fadst4     },  // DCT_ADST = 2
-  { fadst4,     fadst4     }   // ADST_ADST = 3
+  { fdct4,  fdct4  },  // DCT_DCT  = 0
+  { fadst4, fdct4  },  // ADST_DCT = 1
+  { fdct4,  fadst4 },  // DCT_ADST = 2
+  { fadst4, fadst4 }   // ADST_ADST = 3
 };
 
 static const transform_2d FHT_8[] = {
-  { vp10_fdct8, vp10_fdct8 },  // DCT_DCT  = 0
-  { fadst8,     vp10_fdct8 },  // ADST_DCT = 1
-  { vp10_fdct8, fadst8     },  // DCT_ADST = 2
-  { fadst8,     fadst8     }   // ADST_ADST = 3
+  { fdct8,  fdct8  },  // DCT_DCT  = 0
+  { fadst8, fdct8  },  // ADST_DCT = 1
+  { fdct8,  fadst8 },  // DCT_ADST = 2
+  { fadst8, fadst8 }   // ADST_ADST = 3
 };
 
 static const transform_2d FHT_16[] = {
-  { vp10_fdct16, vp10_fdct16 },  // DCT_DCT  = 0
-  { fadst16,     vp10_fdct16 },  // ADST_DCT = 1
-  { vp10_fdct16, fadst16     },  // DCT_ADST = 2
-  { fadst16,     fadst16     }   // ADST_ADST = 3
+  { fdct16,  fdct16  },  // DCT_DCT  = 0
+  { fadst16, fdct16  },  // ADST_DCT = 1
+  { fdct16,  fadst16 },  // DCT_ADST = 2
+  { fadst16, fadst16 }   // ADST_ADST = 3
 };
 
 void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
@@ -1124,7 +1125,7 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
 
   // Rows
   for (i = 0; i < 8; ++i) {
-    vp10_fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
+    fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
     for (j = 0; j < 8; ++j)
       coeff_ptr[j + i * 8] /= 2;
   }
diff --git a/vp10/encoder/dct.h b/vp10/encoder/dct.h
deleted file mode 100644
index ab0db93eb..000000000
--- a/vp10/encoder/dct.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef VP10_ENCODER_DCT_H_
-#define VP10_ENCODER_DCT_H_
-
-#include "vpx_dsp/vpx_dsp_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void vp10_fdct4(const tran_low_t *input, tran_low_t *output);
-void vp10_fdct8(const tran_low_t *input, tran_low_t *output);
-void vp10_fdct16(const tran_low_t *input, tran_low_t *output);
-void vp10_fdct32_local(const tran_low_t *input, tran_low_t *output);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP10_ENCODER_DCT_H_
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index c3b6265e3..019e5b1e9 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -2696,18 +2696,6 @@ static void encode_frame_internal(VP10_COMP *cpi) {
                  cm->uv_dc_delta_q == 0 &&
                  cm->uv_ac_delta_q == 0;
 
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (cm->use_highbitdepth)
-    x->fwd_txm4x4 = xd->lossless ? vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4;
-  else
-    x->fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
-  x->highbd_itxm_add = xd->lossless ? vp10_highbd_iwht4x4_add :
-                                      vp10_highbd_idct4x4_add;
-#else
-  x->fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  x->itxm_add = xd->lossless ? vp10_iwht4x4_add : vp10_idct4x4_add;
-
   if (xd->lossless)
     x->optimize = 0;
 
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 65692ddd8..33077200d 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -367,7 +367,11 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                                scan_order->scan, scan_order->iscan);
         break;
       case TX_4X4:
-        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+        if (xd->lossless) {
+          vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+        } else {
+          vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
+        }
         vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                                pd->dequant, eob,
@@ -403,7 +407,11 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                         scan_order->scan, scan_order->iscan);
       break;
     case TX_4X4:
-      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+      if (xd->lossless) {
+        vp10_fwht4x4(src_diff, coeff, diff_stride);
+      } else {
+        vpx_fdct4x4(src_diff, coeff, diff_stride);
+      }
       vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                       p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                       pd->dequant, eob,
@@ -453,7 +461,11 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                                pd->dequant[0], eob);
         break;
       case TX_4X4:
-        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+        if (xd->lossless) {
+          vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+        } else {
+          vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
+        }
         vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
                                p->quant_fp[0], qcoeff, dqcoeff,
                                pd->dequant[0], eob);
@@ -485,7 +497,11 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                       pd->dequant[0], eob);
       break;
     case TX_4X4:
-      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+      if (xd->lossless) {
+        vp10_fwht4x4(src_diff, coeff, diff_stride);
+      } else {
+        vpx_fdct4x4(src_diff, coeff, diff_stride);
+      }
       vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
                       p->quant_fp[0], qcoeff, dqcoeff,
                       pd->dequant[0], eob);
@@ -496,22 +512,24 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block,
   }
 }
 
-void vp10_fwd_txfm_4x4(const int16_t *src_diff,
-                       tran_low_t *coeff, int diff_stride, TX_TYPE tx_type,
-                       void (*fwd_txm4x4)(const int16_t *input,
-                           tran_low_t *output, int stride)) {
-  switch (tx_type) {
-    case DCT_DCT:
-      fwd_txm4x4(src_diff, coeff, diff_stride);
-      break;
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
-      vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
-      break;
-    default:
-      assert(0);
-      break;
+void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+                       int diff_stride, TX_TYPE tx_type, int lossless) {
+  if (lossless) {
+    vp10_fwht4x4(src_diff, coeff, diff_stride);
+  } else {
+    switch (tx_type) {
+      case DCT_DCT:
+        vpx_fdct4x4(src_diff, coeff, diff_stride);
+        break;
+      case ADST_DCT:
+      case DCT_ADST:
+      case ADST_ADST:
+        vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
+        break;
+      default:
+        assert(0);
+        break;
+    }
   }
 }
 
@@ -565,21 +583,24 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
 
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
-                              int diff_stride, TX_TYPE tx_type,
-                              void (*highbd_fwd_txm4x4)(const int16_t *input,
-                                  tran_low_t *output, int stride)) {
-  switch (tx_type) {
-    case DCT_DCT:
-      highbd_fwd_txm4x4(src_diff, coeff, diff_stride);
-      break;
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
-      vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
-      break;
-    default:
-      assert(0);
-      break;
+                              int diff_stride, TX_TYPE tx_type, int lossless) {
+  if (lossless) {
+    assert(tx_type == DCT_DCT);
+    vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+  } else {
+    switch (tx_type) {
+      case DCT_DCT:
+        vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
+        break;
+      case ADST_DCT:
+      case DCT_ADST:
+      case ADST_ADST:
+        vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+        break;
+      default:
+        assert(0);
+        break;
+    }
   }
 }
 
@@ -681,7 +702,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
         break;
       case TX_4X4:
         vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
-                                 x->fwd_txm4x4);
+                                 xd->lossless);
         vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                               p->quant, p->quant_shift, qcoeff, dqcoeff,
                               pd->dequant, eob,
@@ -717,7 +738,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
                      scan_order->scan, scan_order->iscan);
       break;
     case TX_4X4:
-      vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, x->fwd_txm4x4);
+      vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, xd->lossless);
       vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, eob,
@@ -820,7 +841,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
         // case.
         vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride,
                                      p->eobs[block], xd->bd, tx_type,
-                                     x->highbd_itxm_add);
+                                     xd->lossless);
         break;
       default:
         assert(0 && "Invalid transform size");
@@ -849,7 +870,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
       // which is significant (not just an optimization) for the lossless
       // case.
       vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block],
-                            tx_type, x->itxm_add);
+                            tx_type, xd->lossless);
       break;
     default:
       assert(0 && "Invalid transform size");
@@ -874,11 +895,21 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
   if (p->eobs[block] > 0) {
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-       x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
-       return;
+      if (xd->lossless) {
+        vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride,
+                                p->eobs[block], xd->bd);
+      } else {
+        vp10_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride,
+                                p->eobs[block], xd->bd);
+      }
+      return;
     }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-    x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+    if (xd->lossless) {
+      vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+    } else {
+      vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+    }
   }
 }
 
@@ -999,7 +1030,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride,
                                     src, src_stride, dst, dst_stride, xd->bd);
           vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
-                                   x->fwd_txm4x4);
+                                   xd->lossless);
           vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
                                 pd->dequant, eob,
@@ -1011,7 +1042,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           // eob<=1 which is significant (not just an optimization) for the
           // lossless case.
           vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd,
-                                       tx_type, x->highbd_itxm_add);
+                                       tx_type, xd->lossless);
         break;
       default:
         assert(0);
@@ -1068,7 +1099,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       if (!x->skip_recode) {
         vpx_subtract_block(4, 4, src_diff, diff_stride,
                            src, src_stride, dst, dst_stride);
-        vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, x->fwd_txm4x4);
+        vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, xd->lossless);
         vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
                        p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, eob, scan_order->scan,
@@ -1080,7 +1111,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         // which is significant (not just an optimization) for the lossless
         // case.
         vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
-                              x->itxm_add);
+                              xd->lossless);
       }
       break;
     default:
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h
index 928be6c5e..62a7db4a2 100644
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h
@@ -39,16 +39,12 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
 
 void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
-void vp10_fwd_txfm_4x4(const int16_t *src_diff,
-                       tran_low_t *coeff, int diff_stride, TX_TYPE tx_type,
-                       void (*fwd_txm4x4)(const int16_t *input,
-                           tran_low_t *output, int stride));
+void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+                       int diff_stride, TX_TYPE tx_type, int lossless);
 
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
-                              int diff_stride, TX_TYPE tx_type,
-                              void (*highbd_fwd_txm4x4)(const int16_t *input,
-                                  tran_low_t *output, int stride));
+                              int diff_stride, TX_TYPE tx_type, int lossless);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #ifdef __cplusplus
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 6ae4c5ebe..a38e1829e 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -1455,8 +1455,13 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
 
   cm->interp_filter = cpi->sf.default_interp_filter;
 
-  cm->display_width = cpi->oxcf.width;
-  cm->display_height = cpi->oxcf.height;
+  if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
+    cm->render_width = cpi->oxcf.render_width;
+    cm->render_height = cpi->oxcf.render_height;
+  } else {
+    cm->render_width = cpi->oxcf.width;
+    cm->render_height = cpi->oxcf.height;
+  }
   cm->width = cpi->oxcf.width;
   cm->height = cpi->oxcf.height;
 
@@ -1820,14 +1825,15 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
 
 void vp10_remove_compressor(VP10_COMP *cpi) {
-  VP10_COMMON *const cm = &cpi->common;
+  VP10_COMMON *cm;
   unsigned int i;
   int t;
 
   if (!cpi)
     return;
 
-  if (cpi && (cm->current_video_frame > 0)) {
+  cm = &cpi->common;
+  if (cm->current_video_frame > 0) {
 #if CONFIG_INTERNAL_STATS
     vpx_clear_system_state();
 
@@ -3626,6 +3632,8 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi,
   cm->frame_to_show = get_frame_new_buffer(cm);
   cm->frame_to_show->color_space = cm->color_space;
   cm->frame_to_show->color_range = cm->color_range;
+  cm->frame_to_show->render_width  = cm->render_width;
+  cm->frame_to_show->render_height = cm->render_height;
 
   // Pick the loop filter level for the frame.
   loopfilter_frame(cpi, cm);
@@ -4086,19 +4094,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
   }
 
   if (oxcf->pass == 1) {
-    const int lossless = is_lossless_requested(oxcf);
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (cpi->oxcf.use_highbitdepth)
-      cpi->td.mb.fwd_txm4x4 = lossless ?
-          vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4;
-    else
-      cpi->td.mb.fwd_txm4x4 = lossless ? vp10_fwht4x4 : vpx_fdct4x4;
-    cpi->td.mb.highbd_itxm_add = lossless ? vp10_highbd_iwht4x4_add :
-                                         vp10_highbd_idct4x4_add;
-#else
-    cpi->td.mb.fwd_txm4x4 = lossless ? vp10_fwht4x4 : vpx_fdct4x4;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    cpi->td.mb.itxm_add = lossless ? vp10_iwht4x4_add : vp10_idct4x4_add;
+    cpi->td.mb.e_mbd.lossless = is_lossless_requested(oxcf);
     vp10_first_pass(cpi, source);
   } else if (oxcf->pass == 2) {
     Pass2Encode(cpi, size, dest, frame_flags);
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 8ab1efab9..6f3f7113c 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -229,6 +229,8 @@ typedef struct VP10EncoderConfig {
 #endif
   vpx_color_space_t color_space;
   int color_range;
+  int render_width;
+  int render_height;
 } VP10EncoderConfig;
 
 static INLINE int is_lossless_requested(const VP10EncoderConfig *cfg) {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 20b7d50ec..e3bbdd346 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -798,8 +798,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
           if (xd->lossless) {
             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
             const scan_order *so = get_scan(TX_4X4, tx_type);
-            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT,
-                                     vp10_highbd_fwht4x4);
+            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
             vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
             ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                  so->scan, so->neighbors,
@@ -808,14 +807,12 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
               goto next_highbd;
             vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
                                          dst, dst_stride, p->eobs[block],
-                                         xd->bd, DCT_DCT,
-                                         vp10_highbd_iwht4x4_add);
+                                         xd->bd, DCT_DCT, 1);
           } else {
             int64_t unused;
             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
             const scan_order *so = get_scan(TX_4X4, tx_type);
-            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type,
-                                     vpx_highbd_fdct4x4);
+            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
             vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
             ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                  so->scan, so->neighbors,
@@ -827,8 +824,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
               goto next_highbd;
             vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
                                          dst, dst_stride, p->eobs[block],
-                                         xd->bd, tx_type,
-                                         vp10_highbd_idct4x4_add);
+                                         xd->bd, tx_type, 0);
           }
         }
       }
@@ -901,7 +897,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
         if (xd->lossless) {
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
           const scan_order *so = get_scan(TX_4X4, tx_type);
-          vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, vp10_fwht4x4);
+          vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
           vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                so->scan, so->neighbors,
@@ -909,13 +905,12 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
             goto next;
           vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
-                                dst, dst_stride, p->eobs[block], DCT_DCT,
-                                vp10_iwht4x4_add);
+                                dst, dst_stride, p->eobs[block], DCT_DCT, 1);
         } else {
           int64_t unused;
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
           const scan_order *so = get_scan(TX_4X4, tx_type);
-          vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, vpx_fdct4x4);
+          vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
           vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                              so->scan, so->neighbors,
@@ -925,8 +920,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
             goto next;
           vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
-                                dst, dst_stride, p->eobs[block], tx_type,
-                                vp10_idct4x4_add);
+                                dst, dst_stride, p->eobs[block], tx_type, 0);
         }
       }
     }
@@ -1290,6 +1284,8 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
   int idx, idy;
+  void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
+
   const uint8_t *const src =
       &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
   uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
@@ -1303,6 +1299,16 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    fwd_txm4x4 = xd->lossless ? vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4;
+  } else {
+    fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
+  }
+#else
+  fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     vpx_highbd_subtract_block(
         height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
         8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
@@ -1325,8 +1331,8 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
 
       k += (idy * 2 + idx);
       coeff = BLOCK_OFFSET(p->coeff, k);
-      x->fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
-                    coeff, 8);
+      fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
+                 coeff, 8);
       vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 2cb309d8e..304f74eee 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c
@@ -46,6 +46,8 @@ struct vp10_extracfg {
   vp9e_tune_content           content;
   vpx_color_space_t           color_space;
   int                         color_range;
+  int                         render_width;
+  int                         render_height;
 };
 
 static struct vp10_extracfg default_extra_cfg = {
@@ -73,6 +75,8 @@ static struct vp10_extracfg default_extra_cfg = {
   VP9E_CONTENT_DEFAULT,       // content
   VPX_CS_UNKNOWN,             // color space
   0,                          // color range
+  0,                          // render width
+  0,                          // render height
 };
 
 struct vpx_codec_alg_priv {
@@ -402,6 +406,8 @@ static vpx_codec_err_t set_encoder_config(
 
   oxcf->color_space = extra_cfg->color_space;
   oxcf->color_range = extra_cfg->color_range;
+  oxcf->render_width  = extra_cfg->render_width;
+  oxcf->render_height = extra_cfg->render_height;
   oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
   oxcf->arnr_strength   = extra_cfg->arnr_strength;
   oxcf->min_gf_interval = extra_cfg->min_gf_interval;
@@ -1232,6 +1238,15 @@ static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx,
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx,
+                                            va_list args) {
+  struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+  int *const render_size = va_arg(args, int *);
+  extra_cfg.render_width  = render_size[0];
+  extra_cfg.render_height = render_size[0];
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,                ctrl_copy_reference},
   {VP8E_UPD_ENTROPY,                  ctrl_update_entropy},
@@ -1269,6 +1284,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP9E_SET_NOISE_SENSITIVITY,        ctrl_set_noise_sensitivity},
   {VP9E_SET_MIN_GF_INTERVAL,          ctrl_set_min_gf_interval},
   {VP9E_SET_MAX_GF_INTERVAL,          ctrl_set_max_gf_interval},
+  {VP9E_SET_RENDER_SIZE,              ctrl_set_render_size},
 
   // Getters
   {VP8E_GET_LAST_QUANTIZER,           ctrl_get_quantizer},
diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c
index a8f9aa351..33337a4bd 100644
--- a/vp10/vp10_dx_iface.c
+++ b/vp10/vp10_dx_iface.c
@@ -978,9 +978,9 @@ static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx,
   return VPX_CODEC_INVALID_PARAM;
 }
 
-static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
-                                             va_list args) {
-  int *const display_size = va_arg(args, int *);
+static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx,
+                                            va_list args) {
+  int *const render_size = va_arg(args, int *);
 
   // Only support this function in serial decode.
   if (ctx->frame_parallel_decode) {
@@ -988,14 +988,14 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
     return VPX_CODEC_INCAPABLE;
   }
 
-  if (display_size) {
+  if (render_size) {
     if (ctx->frame_workers) {
       VPxWorker *const worker = ctx->frame_workers;
       FrameWorkerData *const frame_worker_data =
           (FrameWorkerData *)worker->data1;
       const VP10_COMMON *const cm = &frame_worker_data->pbi->common;
-      display_size[0] = cm->display_width;
-      display_size[1] = cm->display_height;
+      render_size[0] = cm->render_width;
+      render_size[1] = cm->render_height;
       return VPX_CODEC_OK;
     } else {
       return VPX_CODEC_ERROR;
@@ -1094,7 +1094,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   {VP8D_GET_LAST_REF_UPDATES,     ctrl_get_last_ref_updates},
   {VP8D_GET_FRAME_CORRUPTED,      ctrl_get_frame_corrupted},
   {VP9_GET_REFERENCE,             ctrl_get_reference},
-  {VP9D_GET_DISPLAY_SIZE,         ctrl_get_display_size},
+  {VP9D_GET_DISPLAY_SIZE,         ctrl_get_render_size},
   {VP9D_GET_BIT_DEPTH,            ctrl_get_bit_depth},
   {VP9D_GET_FRAME_SIZE,           ctrl_get_frame_size},
 
diff --git a/vp10/vp10_iface_common.h b/vp10/vp10_iface_common.h
index 7987d18aa..b2b4b7d8f 100644
--- a/vp10/vp10_iface_common.h
+++ b/vp10/vp10_iface_common.h
@@ -43,6 +43,8 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG  *yv12,
   img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
   img->d_w = yv12->y_crop_width;
   img->d_h = yv12->y_crop_height;
+  img->r_w = yv12->render_width;
+  img->r_h = yv12->render_height;
   img->x_chroma_shift = yv12->subsampling_x;
   img->y_chroma_shift = yv12->subsampling_y;
   img->planes[VPX_PLANE_Y] = yv12->y_buffer;
@@ -84,6 +86,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
 
   yv12->y_crop_width  = img->d_w;
   yv12->y_crop_height = img->d_h;
+  yv12->render_width  = img->r_w;
+  yv12->render_height = img->r_h;
   yv12->y_width  = img->d_w;
   yv12->y_height = img->d_h;
 
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk
index 7393a4e6c..ead993ac2 100644
--- a/vp10/vp10cx.mk
+++ b/vp10/vp10cx.mk
@@ -23,7 +23,6 @@ VP10_CX_SRCS-yes += encoder/context_tree.c
 VP10_CX_SRCS-yes += encoder/context_tree.h
 VP10_CX_SRCS-yes += encoder/cost.h
 VP10_CX_SRCS-yes += encoder/cost.c
-VP10_CX_SRCS-yes += encoder/dct.h
 VP10_CX_SRCS-yes += encoder/dct.c
 VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.c
 VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.h
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index f2642227b..7da3d71ad 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1593,11 +1593,38 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) {
     if (Q < thresh_qp &&
         cpi->projected_frame_size > thresh_rate &&
         pred_err_mb > thresh_pred_err_mb) {
+      double new_correction_factor = cpi->rate_correction_factor;
+      const int target_size = cpi->av_per_frame_bandwidth;
+      int target_bits_per_mb;
       // Drop this frame: advance frame counters, and set force_maxqp flag.
       cpi->common.current_video_frame++;
       cpi->frames_since_key++;
       // Flag to indicate we will force next frame to be encoded at max QP.
       cpi->force_maxqp = 1;
+      // Reset the buffer levels.
+      cpi->buffer_level = cpi->oxcf.optimal_buffer_level;
+      cpi->bits_off_target = cpi->oxcf.optimal_buffer_level;
+      // Compute a new rate correction factor, corresponding to the current
+      // target frame size and max_QP, and adjust the rate correction factor
+      // upwards, if needed.
+      // This is to prevent a bad state where the re-encoded frame at max_QP
+      // undershoots significantly, and then we end up dropping every other
+      // frame because the QP/rate_correction_factor may have been too low
+      // before the drop and then takes too long to come up.
+      if (target_size >= (INT_MAX >> BPER_MB_NORMBITS))
+        target_bits_per_mb =
+            (target_size / cpi->common.MBs) << BPER_MB_NORMBITS;
+      else
+        target_bits_per_mb =
+            (target_size << BPER_MB_NORMBITS) / cpi->common.MBs;
+      // Rate correction factor based on target_size_per_mb and max_QP.
+      new_correction_factor = (double)target_bits_per_mb /
+          (double)vp8_bits_per_mb[INTER_FRAME][cpi->worst_quality];
+      if (new_correction_factor > cpi->rate_correction_factor)
+        cpi->rate_correction_factor =
+            VPXMIN(2.0 * cpi->rate_correction_factor, new_correction_factor);
+      if (cpi->rate_correction_factor > MAX_BPB_FACTOR)
+        cpi->rate_correction_factor = MAX_BPB_FACTOR;
       return 1;
     } else {
       cpi->force_maxqp = 0;
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 99e51c43d..a12a2ad0e 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -246,8 +246,8 @@ static void yuvconfig2image(vpx_image_t               *img,
     img->fmt = VPX_IMG_FMT_I420;
     img->w = yv12->y_stride;
     img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
-    img->d_w = yv12->y_width;
-    img->d_h = yv12->y_height;
+    img->d_w = img->r_w = yv12->y_width;
+    img->d_h = img->r_h = yv12->y_height;
     img->x_chroma_shift = 1;
     img->y_chroma_shift = 1;
     img->planes[VPX_PLANE_Y] = yv12->y_buffer;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 61e731a1d..6fb8dca22 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -115,8 +115,8 @@ typedef struct VP9Common {
   int color_range;
   int width;
   int height;
-  int display_width;
-  int display_height;
+  int render_width;
+  int render_height;
   int last_width;
   int last_height;
 
diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h
index db6587fcb..b3b60c253 100644
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
-#define VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
+#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_
+#define VP9_COMMON_VP9_THREAD_COMMON_H_
 #include "./vpx_config.h"
 #include "vp9/common/vp9_loopfilter.h"
 #include "vpx_util/vpx_thread.h"
@@ -62,4 +62,4 @@ void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
 }  // extern "C"
 #endif
 
-#endif  // VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
+#endif  // VP9_COMMON_VP9_THREAD_COMMON_H_
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 4e216121e..22995bd17 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1185,11 +1185,11 @@ static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
                              : literal_to_filter[vpx_rb_read_literal(rb, 2)];
 }
 
-static void setup_display_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
-  cm->display_width = cm->width;
-  cm->display_height = cm->height;
+static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+  cm->render_width = cm->width;
+  cm->render_height = cm->height;
   if (vpx_rb_read_bit(rb))
-    vp9_read_frame_size(rb, &cm->display_width, &cm->display_height);
+    vp9_read_frame_size(rb, &cm->render_width, &cm->render_height);
 }
 
 static void resize_mv_buffer(VP9_COMMON *cm) {
@@ -1237,7 +1237,7 @@ static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
   BufferPool *const pool = cm->buffer_pool;
   vp9_read_frame_size(rb, &width, &height);
   resize_context_buffers(cm, width, height);
-  setup_display_size(cm, rb);
+  setup_render_size(cm, rb);
 
   lock_buffer_pool(pool);
   if (vpx_realloc_frame_buffer(
@@ -1261,6 +1261,8 @@ static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
   pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
   pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
   pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_width  = cm->render_width;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
 static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
@@ -1319,7 +1321,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
   }
 
   resize_context_buffers(cm, width, height);
-  setup_display_size(cm, rb);
+  setup_render_size(cm, rb);
 
   lock_buffer_pool(pool);
   if (vpx_realloc_frame_buffer(
@@ -1343,6 +1345,8 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
   pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
   pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
   pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_width  = cm->render_width;
+  pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
 static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
@@ -1956,6 +1960,8 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
 #endif
   get_frame_new_buffer(cm)->color_space = cm->color_space;
   get_frame_new_buffer(cm)->color_range = cm->color_range;
+  get_frame_new_buffer(cm)->render_width  = cm->render_width;
+  get_frame_new_buffer(cm)->render_height = cm->render_height;
 
   if (pbi->need_resync) {
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 61077cdbe..4e88819b1 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -126,6 +126,9 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
 void vp9_decoder_remove(VP9Decoder *pbi) {
   int i;
 
+  if (!pbi)
+    return;
+
   vpx_get_worker_interface()->end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
   vpx_free(pbi->tile_data);
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index bb1e17956..968dad26b 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -23,6 +23,7 @@
 
 CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
   size_t last_coded_q_map_size;
+  size_t consec_zero_mv_size;
   CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
   if (cr == NULL)
     return NULL;
@@ -41,12 +42,20 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
   assert(MAXQ <= 255);
   memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
 
+  consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
+  cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
+  if (cr->consec_zero_mv == NULL) {
+    vpx_free(cr);
+    return NULL;
+  }
+  memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
   return cr;
 }
 
 void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
   vpx_free(cr->map);
   vpx_free(cr->last_coded_q_map);
+  vpx_free(cr->consec_zero_mv);
   vpx_free(cr);
 }
 
@@ -228,22 +237,48 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
       int map_offset = block_index + y * cm->mi_cols + x;
       cr->map[map_offset] = new_map_value;
       cpi->segmentation_map[map_offset] = mbmi->segment_id;
+    }
+}
+
+void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
+                                             const MB_MODE_INFO *const mbmi,
+                                             int mi_row, int mi_col,
+                                             BLOCK_SIZE bsize) {
+  const VP9_COMMON *const cm = &cpi->common;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+  MV mv = mbmi->mv[0].as_mv;
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+  const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+  const int block_index = mi_row * cm->mi_cols + mi_col;
+  int x, y;
+  for (y = 0; y < ymis; y++)
+    for (x = 0; x < xmis; x++) {
+      int map_offset = block_index + y * cm->mi_cols + x;
       // Inter skip blocks were clearly not coded at the current qindex, so
       // don't update the map for them. For cases where motion is non-zero or
       // the reference frame isn't the previous frame, the previous value in
       // the map for this spatial location is not entirely correct.
-      if ((!is_inter_block(mbmi) || !skip) &&
+      if ((!is_inter_block(mbmi) || !mbmi->skip) &&
           mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
         cr->last_coded_q_map[map_offset] = clamp(
             cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
-      } else if (is_inter_block(mbmi) && skip &&
+      } else if (is_inter_block(mbmi) && mbmi->skip &&
                  mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
         cr->last_coded_q_map[map_offset] = VPXMIN(
             clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id],
                   0, MAXQ),
             cr->last_coded_q_map[map_offset]);
+      // Update the consecutive zero/low_mv count.
+      if (is_inter_block(mbmi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) {
+        if (cr->consec_zero_mv[map_offset] < 255)
+          cr->consec_zero_mv[map_offset]++;
+      } else {
+        cr->consec_zero_mv[map_offset] = 0;
       }
     }
+  }
 }
 
 // Update the actual number of blocks that were applied the segment delta q.
@@ -380,9 +415,10 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
     int mi_row = sb_row_index * MI_BLOCK_SIZE;
     int mi_col = sb_col_index * MI_BLOCK_SIZE;
     int qindex_thresh =
-        cpi->oxcf.content == VP9E_CONTENT_SCREEN
-            ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
-            : 0;
+        vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex);
+    int consec_zero_mv_thresh =
+        cpi->oxcf.content == VP9E_CONTENT_SCREEN ? 0
+        : 10 * (100 / cr->percent_refresh);
     assert(mi_row >= 0 && mi_row < cm->mi_rows);
     assert(mi_col >= 0 && mi_col < cm->mi_cols);
     bl_index = mi_row * cm->mi_cols + mi_col;
@@ -398,7 +434,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
         // for possible boost/refresh (segment 1). The segment id may get
         // reset to 0 later if block gets coded anything other than ZEROMV.
         if (cr->map[bl_index2] == 0) {
-          if (cr->last_coded_q_map[bl_index2] > qindex_thresh)
+          if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
+              cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh)
             sum_map++;
         } else if (cr->map[bl_index2] < 0) {
           cr->map[bl_index2]++;
@@ -475,6 +512,8 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     if (cm->frame_type == KEY_FRAME) {
       memset(cr->last_coded_q_map, MAXQ,
              cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
+      memset(cr->consec_zero_mv, 0,
+             cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv));
       cr->sb_index = 0;
     }
     return;
@@ -544,6 +583,8 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
   const VP9_COMMON *const cm = &cpi->common;
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
+  memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
+  memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
   cr->sb_index = 0;
   cpi->refresh_golden_frame = 1;
 }
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 7da1f94cf..839ce6df4 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -51,6 +51,8 @@ struct CYCLIC_REFRESH {
   signed char *map;
   // Map of the last q a block was coded at.
   uint8_t *last_coded_q_map;
+  // Count on how many consecutive times a block uses ZER0MV for encoding.
+  uint8_t *consec_zero_mv;
   // Thresholds applied to the projected rate/distortion of the coding block,
   // when deciding whether block should be refreshed.
   int64_t thresh_rate_sb;
@@ -92,6 +94,11 @@ void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
                                        int mi_row, int mi_col, BLOCK_SIZE bsize,
                                        int64_t rate, int64_t dist, int skip);
 
+void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
+                                             const MB_MODE_INFO *const mbmi,
+                                             int mi_row, int mi_col,
+                                             BLOCK_SIZE bsize);
+
 // Update the segmentation map, and related quantities: cyclic refresh map,
 // refresh sb_index, and target number of blocks to be refreshed.
 void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 01cced001..f3c8579b3 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -969,14 +969,14 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
   return total_size;
 }
 
-static void write_display_size(const VP9_COMMON *cm,
-                               struct vpx_write_bit_buffer *wb) {
-  const int scaling_active = cm->width != cm->display_width ||
-                             cm->height != cm->display_height;
+static void write_render_size(const VP9_COMMON *cm,
+                              struct vpx_write_bit_buffer *wb) {
+  const int scaling_active = cm->width != cm->render_width ||
+                             cm->height != cm->render_height;
   vpx_wb_write_bit(wb, scaling_active);
   if (scaling_active) {
-    vpx_wb_write_literal(wb, cm->display_width - 1, 16);
-    vpx_wb_write_literal(wb, cm->display_height - 1, 16);
+    vpx_wb_write_literal(wb, cm->render_width - 1, 16);
+    vpx_wb_write_literal(wb, cm->render_height - 1, 16);
   }
 }
 
@@ -985,7 +985,7 @@ static void write_frame_size(const VP9_COMMON *cm,
   vpx_wb_write_literal(wb, cm->width - 1, 16);
   vpx_wb_write_literal(wb, cm->height - 1, 16);
 
-  write_display_size(cm, wb);
+  write_render_size(cm, wb);
 }
 
 static void write_frame_size_with_refs(VP9_COMP *cpi,
@@ -1023,7 +1023,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
     vpx_wb_write_literal(wb, cm->height - 1, 16);
   }
 
-  write_display_size(cm, wb);
+  write_render_size(cm, wb);
 }
 
 static void write_sync_code(struct vpx_write_bit_buffer *wb) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 4a4301e85..1c4f35a53 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4213,5 +4213,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
     }
     ++td->counts->tx.tx_totals[mbmi->tx_size];
     ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+    if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize);
   }
 }
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index aa110f625..e67ce8851 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1509,15 +1509,23 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
 
   cm->interp_filter = cpi->sf.default_interp_filter;
 
-  cm->display_width = cpi->oxcf.width;
-  cm->display_height = cpi->oxcf.height;
+  if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
+    cm->render_width = cpi->oxcf.render_width;
+    cm->render_height = cpi->oxcf.render_height;
+  } else {
+    cm->render_width = cpi->oxcf.width;
+    cm->render_height = cpi->oxcf.height;
+  }
   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
     cm->width = cpi->oxcf.width;
     cm->height = cpi->oxcf.height;
   }
 
   if (cpi->initial_width) {
-    if (cm->width > cpi->initial_width || cm->height > cpi->initial_height) {
+    int new_mi_size = 0;
+    vp9_set_mb_mi(cm, cm->width, cm->height);
+    new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
+    if (cm->mi_alloc_size < new_mi_size) {
       vp9_free_context_buffers(cm);
       alloc_compressor_data(cpi);
       realloc_segmentation_maps(cpi);
@@ -1927,14 +1935,15 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
 
 void vp9_remove_compressor(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
+  VP9_COMMON *cm;
   unsigned int i;
   int t;
 
   if (!cpi)
     return;
 
-  if (cpi && (cm->current_video_frame > 0)) {
+  cm = &cpi->common;
+  if (cm->current_video_frame > 0) {
 #if CONFIG_INTERNAL_STATS
     vpx_clear_system_state();
 
@@ -3823,6 +3832,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cm->frame_to_show = get_frame_new_buffer(cm);
   cm->frame_to_show->color_space = cm->color_space;
   cm->frame_to_show->color_range = cm->color_range;
+  cm->frame_to_show->render_width  = cm->render_width;
+  cm->frame_to_show->render_height = cm->render_height;
 
   // Pick the loop filter level for the frame.
   loopfilter_frame(cpi, cm);
@@ -4645,8 +4656,10 @@ int vp9_set_internal_size(VP9_COMP *cpi,
   // always go to the next whole number
   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
-  assert(cm->width <= cpi->initial_width);
-  assert(cm->height <= cpi->initial_height);
+  if (cm->current_video_frame) {
+    assert(cm->width <= cpi->initial_width);
+    assert(cm->height <= cpi->initial_height);
+  }
 
   update_frame_size(cpi);
 
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index b50f2fb31..174e2b461 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -239,6 +239,8 @@ typedef struct VP9EncoderConfig {
 #endif
   vpx_color_space_t color_space;
   int color_range;
+  int render_width;
+  int render_height;
   VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
 } VP9EncoderConfig;
 
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 64a4ebd7a..25209f4b1 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -55,7 +55,6 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
       int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
       LAYER_CONTEXT *const lc = &svc->layer_context[layer];
       RATE_CONTROL *const lrc = &lc->rc;
-      size_t last_coded_q_map_size;
       int i;
       lc->current_video_frame_in_layer = 0;
       lc->layer_size = 0;
@@ -105,14 +104,18 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
       // Cyclic refresh is only applied on base temporal layer.
       if (oxcf->ss_number_layers > 1 &&
           tl == 0) {
+        size_t last_coded_q_map_size;
+        size_t consec_zero_mv_size;
         lc->sb_index = 0;
         lc->map = vpx_malloc(mi_rows * mi_cols * sizeof(signed char));
         memset(lc->map, 0, mi_rows * mi_cols);
-        last_coded_q_map_size =
-            mi_rows * mi_cols * sizeof(uint8_t);
+        last_coded_q_map_size = mi_rows * mi_cols * sizeof(uint8_t);
         lc->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
         assert(MAXQ <= 255);
         memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
+        consec_zero_mv_size = mi_rows * mi_cols * sizeof(uint8_t);
+        lc->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
+        memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
        }
     }
   }
@@ -286,10 +289,13 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
     CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
     signed char *temp = cr->map;
     uint8_t *temp2 = cr->last_coded_q_map;
+    uint8_t *temp3 = cr->consec_zero_mv;
     cr->map = lc->map;
     lc->map = temp;
     cr->last_coded_q_map = lc->last_coded_q_map;
     lc->last_coded_q_map = temp2;
+    cr->consec_zero_mv = lc->consec_zero_mv;
+    lc->consec_zero_mv = temp3;
     cr->sb_index = lc->sb_index;
   }
 }
@@ -311,10 +317,13 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
     CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
     signed char *temp = lc->map;
     uint8_t *temp2 = lc->last_coded_q_map;
+    uint8_t *temp3 = lc->consec_zero_mv;
     lc->map = cr->map;
     cr->map = temp;
     lc->last_coded_q_map = cr->last_coded_q_map;
     cr->last_coded_q_map = temp2;
+    lc->consec_zero_mv = cr->consec_zero_mv;
+    cr->consec_zero_mv = temp3;
     lc->sb_index = cr->sb_index;
   }
 }
@@ -721,6 +730,8 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) {
           vpx_free(lc->map);
         if (lc->last_coded_q_map)
           vpx_free(lc->last_coded_q_map);
+        if (lc->consec_zero_mv)
+          vpx_free(lc->consec_zero_mv);
     }
   }
 }
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index ae55c2fd3..8feab2968 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -45,6 +45,7 @@ typedef struct {
   int sb_index;
   signed char *map;
   uint8_t *last_coded_q_map;
+  uint8_t *consec_zero_mv;
 } LAYER_CONTEXT;
 
 typedef struct {
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index aea1a5fc2..a253c0692 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -46,6 +46,8 @@ struct vp9_extracfg {
   vp9e_tune_content           content;
   vpx_color_space_t           color_space;
   int                         color_range;
+  int                         render_width;
+  int                         render_height;
 };
 
 static struct vp9_extracfg default_extra_cfg = {
@@ -73,6 +75,8 @@ static struct vp9_extracfg default_extra_cfg = {
   VP9E_CONTENT_DEFAULT,       // content
   VPX_CS_UNKNOWN,             // color space
   0,                          // color range
+  0,                          // render width
+  0,                          // render height
 };
 
 struct vpx_codec_alg_priv {
@@ -469,6 +473,8 @@ static vpx_codec_err_t set_encoder_config(
 
   oxcf->color_space = extra_cfg->color_space;
   oxcf->color_range = extra_cfg->color_range;
+  oxcf->render_width  = extra_cfg->render_width;
+  oxcf->render_height = extra_cfg->render_height;
   oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
   oxcf->arnr_strength   = extra_cfg->arnr_strength;
   oxcf->min_gf_interval = extra_cfg->min_gf_interval;
@@ -1461,6 +1467,15 @@ static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx,
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx,
+                                            va_list args) {
+  struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+  int *const render_size = va_arg(args, int *);
+  extra_cfg.render_width  = render_size[0];
+  extra_cfg.render_height = render_size[1];
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,                ctrl_copy_reference},
   {VP8E_UPD_ENTROPY,                  ctrl_update_entropy},
@@ -1502,6 +1517,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP9E_SET_MIN_GF_INTERVAL,          ctrl_set_min_gf_interval},
   {VP9E_SET_MAX_GF_INTERVAL,          ctrl_set_max_gf_interval},
   {VP9E_SET_SVC_REF_FRAME_CONFIG,     ctrl_set_svc_ref_frame_config},
+  {VP9E_SET_RENDER_SIZE,              ctrl_set_render_size},
 
   // Getters
   {VP8E_GET_LAST_QUANTIZER,           ctrl_get_quantizer},
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index eb2371e2c..c6b1ba95f 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -979,9 +979,9 @@ static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx,
   return VPX_CODEC_INVALID_PARAM;
 }
 
-static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
-                                             va_list args) {
-  int *const display_size = va_arg(args, int *);
+static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx,
+                                            va_list args) {
+  int *const render_size = va_arg(args, int *);
 
   // Only support this function in serial decode.
   if (ctx->frame_parallel_decode) {
@@ -989,14 +989,14 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
     return VPX_CODEC_INCAPABLE;
   }
 
-  if (display_size) {
+  if (render_size) {
     if (ctx->frame_workers) {
       VPxWorker *const worker = ctx->frame_workers;
       FrameWorkerData *const frame_worker_data =
           (FrameWorkerData *)worker->data1;
       const VP9_COMMON *const cm = &frame_worker_data->pbi->common;
-      display_size[0] = cm->display_width;
-      display_size[1] = cm->display_height;
+      render_size[0] = cm->render_width;
+      render_size[1] = cm->render_height;
       return VPX_CODEC_OK;
     } else {
       return VPX_CODEC_ERROR;
@@ -1095,7 +1095,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   {VP8D_GET_LAST_REF_UPDATES,     ctrl_get_last_ref_updates},
   {VP8D_GET_FRAME_CORRUPTED,      ctrl_get_frame_corrupted},
   {VP9_GET_REFERENCE,             ctrl_get_reference},
-  {VP9D_GET_DISPLAY_SIZE,         ctrl_get_display_size},
+  {VP9D_GET_DISPLAY_SIZE,         ctrl_get_render_size},
   {VP9D_GET_BIT_DEPTH,            ctrl_get_bit_depth},
   {VP9D_GET_FRAME_SIZE,           ctrl_get_frame_size},
 
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index 7d514ba55..938d4224b 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -43,6 +43,8 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG  *yv12,
   img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
   img->d_w = yv12->y_crop_width;
   img->d_h = yv12->y_crop_height;
+  img->r_w = yv12->render_width;
+  img->r_h = yv12->render_height;
   img->x_chroma_shift = yv12->subsampling_x;
   img->y_chroma_shift = yv12->subsampling_y;
   img->planes[VPX_PLANE_Y] = yv12->y_buffer;
@@ -84,6 +86,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
 
   yv12->y_crop_width  = img->d_w;
   yv12->y_crop_height = img->d_h;
+  yv12->render_width  = img->r_w;
+  yv12->render_height = img->r_h;
   yv12->y_width  = img->d_w;
   yv12->y_height = img->d_h;
 
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index a437ac969..69b8d3e34 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -564,6 +564,14 @@ enum vp8e_enc_control_id {
    * Supported in codecs: VP9
   */
   VP9E_SET_SVC_REF_FRAME_CONFIG,
+
+  /*!\brief Codec control function to set intended rendering image size.
+   *
+   * By default, this is identical to the image size in pixels.
+   *
+   * Supported in codecs: VP9
+   */
+  VP9E_SET_RENDER_SIZE,
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -798,6 +806,13 @@ VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *)
 VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_RANGE, int)
 
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *)
+
+/*!\brief
+ *
+ * TODO(rbultje) : add support of the control in ffmpeg
+ */
+#define VPX_CTRL_VP9E_SET_RENDER_SIZE
+VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
 /*! @} - end defgroup vp8_encoder */
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index be9314751..51100599c 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -93,6 +93,10 @@ extern "C" {
     unsigned int  d_w;   /**< Displayed image width */
     unsigned int  d_h;   /**< Displayed image height */
 
+    /* Image intended rendering dimensions */
+    unsigned int  r_w;   /**< Intended rendering image width */
+    unsigned int  r_h;   /**< Intended rendering image height */
+
     /* Chroma subsampling info */
     unsigned int  x_chroma_shift;   /**< subsampling order, X */
     unsigned int  y_chroma_shift;   /**< subsampling order, Y */
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 2e12acebc..3a044526e 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -57,6 +57,8 @@ typedef struct yv12_buffer_config {
   unsigned int bit_depth;
   vpx_color_space_t color_space;
   int color_range;
+  int render_width;
+  int render_height;
 
   int corrupted;
   int flags;
diff --git a/vpxdec.c b/vpxdec.c
index fe1e3f040..fde3b9a0b 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -953,22 +953,22 @@ static int main_loop(int argc, const char **argv_) {
           // these is set to 0, use the display size set in the first frame
           // header. If that is unavailable, use the raw decoded size of the
           // first decoded frame.
-          int display_width = vpx_input_ctx.width;
-          int display_height = vpx_input_ctx.height;
-          if (!display_width || !display_height) {
-            int display_size[2];
+          int render_width = vpx_input_ctx.width;
+          int render_height = vpx_input_ctx.height;
+          if (!render_width || !render_height) {
+            int render_size[2];
             if (vpx_codec_control(&decoder, VP9D_GET_DISPLAY_SIZE,
-                                  display_size)) {
+                                  render_size)) {
               // As last resort use size of first frame as display size.
-              display_width = img->d_w;
-              display_height = img->d_h;
+              render_width = img->d_w;
+              render_height = img->d_h;
             } else {
-              display_width = display_size[0];
-              display_height = display_size[1];
+              render_width = render_size[0];
+              render_height = render_size[1];
             }
           }
-          scaled_img = vpx_img_alloc(NULL, img->fmt, display_width,
-                                     display_height, 16);
+          scaled_img = vpx_img_alloc(NULL, img->fmt, render_width,
+                                     render_height, 16);
           scaled_img->bit_depth = img->bit_depth;
         }
 
diff --git a/vpxenc.c b/vpxenc.c
index 06604ea0e..cb78226b3 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1996,7 +1996,7 @@ int main(int argc, const char **argv_) {
     usage_exit();
 
   /* Decide if other chroma subsamplings than 4:2:0 are supported */
-  if (global.codec->fourcc == VP9_FOURCC)
+  if (global.codec->fourcc == VP9_FOURCC || global.codec->fourcc == VP10_FOURCC)
     input.only_i420 = 0;
 
   for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {