63 files changed, 699 insertions, 1031 deletions
diff --git a/configure b/configure
index 789e6c30a..92ca061b9 100755
--- a/configure
+++ b/configure
@@ -46,6 +46,9 @@ Advanced options:
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_onthefly_bitpacking}   enable on-the-fly bitpacking in real-time encoding
   ${toggle_error_concealment}     enable this option to get a decoder which is able to conceal losses
+  ${toggle_coefficient_range_checking}
+                                  enable decoder to check if intermediate
+                                  transform coefficients are in valid range
   ${toggle_runtime_cpu_detect}    runtime cpu detection
   ${toggle_shared}                shared library support
   ${toggle_static}                static library support
@@ -327,6 +330,7 @@ CONFIG_LIST="
     encode_perf_tests
     multi_res_encoding
     temporal_denoising
+    coefficient_range_checking
     experimental
     size_limit
     ${EXPERIMENT_LIST}
@@ -384,6 +388,7 @@ CMDLINE_SELECT="
     encode_perf_tests
     multi_res_encoding
     temporal_denoising
+    coefficient_range_checking
     experimental
 "
 
diff --git a/examples/simple_encoder.c b/examples/simple_encoder.c
index dc9bc06b1..30bb73af0 100644
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -118,11 +118,12 @@ void usage_exit() {
   exit(EXIT_FAILURE);
 }
 
-static void encode_frame(vpx_codec_ctx_t *codec,
-                         vpx_image_t *img,
-                         int frame_index,
-                         int flags,
-                         VpxVideoWriter *writer) {
+static int encode_frame(vpx_codec_ctx_t *codec,
+                        vpx_image_t *img,
+                        int frame_index,
+                        int flags,
+                        VpxVideoWriter *writer) {
+  int got_pkts = 0;
   vpx_codec_iter_t iter = NULL;
   const vpx_codec_cx_pkt_t *pkt = NULL;
   const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1,
@@ -131,6 +132,8 @@ static void encode_frame(vpx_codec_ctx_t *codec,
     die_codec(codec, "Failed to encode frame");
 
   while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
+    got_pkts = 1;
+
     if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
       const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
       if (!vpx_video_writer_write_frame(writer,
@@ -139,11 +142,12 @@ static void encode_frame(vpx_codec_ctx_t *codec,
                                         pkt->data.frame.pts)) {
         die_codec(codec, "Failed to write compressed frame");
       }
-
       printf(keyframe ? "K" : ".");
       fflush(stdout);
     }
   }
+
+  return got_pkts;
 }
 
 int main(int argc, char **argv) {
@@ -230,13 +234,16 @@ int main(int argc, char **argv) {
   if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
     die_codec(&codec, "Failed to initialize encoder");
 
+  // Encode frames.
   while (vpx_img_read(&raw, infile)) {
     int flags = 0;
     if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
       flags |= VPX_EFLAG_FORCE_KF;
     encode_frame(&codec, &raw, frame_count++, flags, writer);
   }
-  encode_frame(&codec, NULL, -1, 0, writer);  // flush the encoder
+
+  // Flush encoder.
+  while (encode_frame(&codec, NULL, -1, 0, writer)) {};
 
   printf("\n");
   fclose(infile);
diff --git a/test/sad_test.cc b/test/sad_test.cc
index f07a98921..e63770bd4 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -640,19 +640,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
 
 #if HAVE_AVX2
 #if CONFIG_VP9_ENCODER
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-}
 const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
 const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
+INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
                         make_tuple(32, 32, sad_32x32x4d_avx2),
                         make_tuple(64, 64, sad_64x64x4d_avx2)));
 #endif  // CONFIG_VP9_ENCODER
diff --git a/test/tools_common.sh b/test/tools_common.sh
index e98beadf8..0bfefba46 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -182,65 +182,6 @@ webm_io_available() {
   [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
 }
 
-# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
-vpxenc_available() {
-  [ -n $(vpx_tool_available vpxenc) ] && echo yes
-}
-
-# Wrapper function for running vpxenc. Positional parameters are interpreted as
-# follows:
-#   1 - codec name
-#   2 - input width
-#   3 - input height
-#   4 - number of frames to encode
-#   5 - path to input file
-#   6 - path to output file
-#       Note: The output file path must end in .ivf to output an IVF file.
-#   7 - extra flags
-#       Note: Extra flags currently supports a special case: when set to "-"
-#             input is piped to vpxenc via cat.
-vpxenc() {
-  local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
-  local codec="${1}"
-  local width=${2}
-  local height=${3}
-  local frames=${4}
-  local input=${5}
-  local output="${VPX_TEST_OUTPUT_DIR}/${6}"
-  local extra_flags=${7}
-
-  # Because --ivf must be within the command line to get IVF from vpxenc.
-  if echo "${output}" | egrep -q 'ivf$'; then
-    use_ivf=--ivf
-  else
-    unset use_ivf
-  fi
-
-  if [ "${extra_flags}" = "-" ]; then
-    pipe_input=yes
-    extra_flags=${8}
-  else
-    unset pipe_input
-  fi
-
-  if [ -z "${pipe_input}" ]; then
-    eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \
-        --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
-        --output="${output}" "${input}" ${devnull}
-  else
-    cat "${input}" \
-        | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \
-            --width=${width} --height=${height} --limit=${frames} ${use_ivf} \
-            ${extra_flags} --output="${output}" - ${devnull}
-  fi
-
-  if [ ! -e "${output}" ]; then
-    # Return non-zero exit status: output file doesn't exist, so something
-    # definitely went wrong.
-    return 1
-  fi
-}
-
 # Filters strings from positional parameter one using the filter specified by
 # positional parameter two. Filter behavior depends on the presence of a third
 # positional parameter. When parameter three is present, strings that match the
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 40b7df630..7d8118235 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P(
 #endif
 
 #if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-unsigned int vp9_sub_pixel_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-}
+
 const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
 const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
 const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
@@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
 const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
     vp9_sub_pixel_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelVarianceTest,
+    AVX2, VP9SubpelVarianceTest,
     ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
                       make_tuple(6, 6, subpel_variance64x64_avx2)));
 
@@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
 const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
     vp9_sub_pixel_avg_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelAvgVarianceTest,
+    AVX2, VP9SubpelAvgVarianceTest,
     ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
                       make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
 #endif  // HAVE_AVX2
diff --git a/test/video_source.h b/test/video_source.h
index 78e7d466b..c924f964f 100644
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -53,55 +53,33 @@ static FILE *OpenTestDataFile(const std::string& file_name) {
   return fopen(path_to_source.c_str(), "rb");
 }
 
-static FILE *OpenTestOutFile(const std::string& file_name) {
-  const std::string path_to_source = GetDataPath() + "/" + file_name;
-  return fopen(path_to_source.c_str(), "wb");
-}
-
-static std::string GetTempOutFilename() {
-  std::string basename;
+static FILE *GetTempOutFile(std::string *file_name) {
+  file_name->clear();
 #if defined(_WIN32)
   char fname[MAX_PATH];
-  // Assume for now that the filename generated is unique per process
-  const UINT ret = GetTempFileNameA(
-      GetDataPath().c_str(), "lvx", 0, fname);
-  if (ret != 0) {
-    const char *slash = strrchr(fname, '\\');
-    if (slash == NULL) slash = strrchr(fname, '/');
-    if (slash == NULL)
-      basename.assign(fname);
-    else
-      basename.assign(slash + 1);
-  } else {
-    basename.clear();
+  char tmppath[MAX_PATH];
+  if (GetTempPathA(MAX_PATH, tmppath)) {
+    // Assume for now that the filename generated is unique per process
+    if (GetTempFileNameA(tmppath, "lvx", 0, fname)) {
+      file_name->assign(fname);
+      return fopen(fname, "wb+");
+    }
   }
+  return NULL;
 #else
-  char fname[256];
-  const std::string templ = GetDataPath() + "/libvpx_test_XXXXXX";
-  strncpy(fname, templ.c_str(), templ.size());
-  fname[templ.size()] = '\0';
-  const int fd = mkstemp(fname);
-  if (fd != -1) {
-    close(fd);
-    basename.assign(strrchr(fname, '/') + 1);
-  } else {
-    basename.clear();
-  }
+  return tmpfile();
 #endif
-  return basename;
 }
 
 class TempOutFile {
  public:
   TempOutFile() {
-    file_name_ = GetTempOutFilename();
-    file_ = OpenTestOutFile(file_name_);
+    file_ = GetTempOutFile(&file_name_);
   }
   ~TempOutFile() {
     CloseFile();
     if (!file_name_.empty()) {
-      const std::string path_to_source = GetDataPath() + "/" + file_name_;
-      EXPECT_EQ(0, remove(path_to_source.c_str()));
+      EXPECT_EQ(0, remove(file_name_.c_str()));
     }
   }
   FILE *file() {
@@ -110,14 +88,19 @@ class TempOutFile {
   const std::string& file_name() {
     return file_name_;
   }
+
+ protected:
   void CloseFile() {
     if (file_) {
-      fclose(file_);
+      // Close if file pointer is associated with an open file
+#if defined(_WIN32)
+      if (file_->_ptr != NULL) fclose(file_);
+#else
+      if (fileno(file_) != -1) fclose(file_);
+#endif
       file_ = NULL;
     }
   }
-
- protected:
   FILE *file_;
   std::string file_name_;
 };
diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh
index 7a964a920..6dd5f171b 100755
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh
@@ -47,43 +47,9 @@ vp9_spatial_svc_encoder() {
   [ -e "${output_file}" ] || return 1
 }
 
-# Each mode is run with layer count 1-$vp9_ssvc_test_layers.
+# Each test is run with layer count 1-$vp9_ssvc_test_layers.
 vp9_ssvc_test_layers=5
 
-DISABLED_vp9_spatial_svc_mode_i() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_i"
-    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -m i -l ${layers}
-    done
-  fi
-}
-
-DISABLED_vp9_spatial_svc_mode_altip() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_altip"
-    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -m "alt-ip" -l ${layers}
-    done
-  fi
-}
-
-DISABLED_vp9_spatial_svc_mode_ip() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_ip"
-    vp9_spatial_svc_encoder "${test_name}" -m ip -l 1
-  fi
-}
-
-DISABLED_vp9_spatial_svc_mode_gf() {
-  if [ "$(vp9_encode_available)" = "yes" ]; then
-    local readonly test_name="DISABLED_vp9_spatial_svc_mode_gf"
-    for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
-      vp9_spatial_svc_encoder "${test_name}" -m gf -l ${layers}
-    done
-  fi
-}
-
 vp9_spatial_svc() {
   if [ "$(vp9_encode_available)" = "yes" ]; then
     local readonly test_name="vp9_spatial_svc"
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index f08c04878..6e9ad3564 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -15,7 +15,7 @@
 ##
 . $(dirname $0)/tools_common.sh
 
-TEST_FRAMES=10
+readonly TEST_FRAMES=10
 
 # Environment check: Make sure input is available.
 vpxenc_verify_environment() {
@@ -39,55 +39,170 @@ vpxenc_can_encode_vp9() {
   fi
 }
 
+# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
+vpxenc_available() {
+  [ -n "$(vpx_tool_available vpxenc)" ] && echo yes
+}
+
+# Wrapper function for running vpxenc with pipe input. Requires that
+# LIBVPX_BIN_PATH points to the directory containing vpxenc. $1 is used as the
+# input file path and shifted away. All remaining parameters are passed through
+# to vpxenc.
+vpxenc_pipe() {
+  local readonly encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
+  local readonly input="$1"
+  shift
+  cat "${input}" | eval "${VPX_TEST_PREFIX}" "${encoder}" - "$@" ${devnull}
+}
+
+# Wrapper function for running vpxenc. Requires that LIBVPX_BIN_PATH points to
+# the directory containing vpxenc. $1 one is used as the input file path and
+# shifted away. All remaining parameters are passed through to vpxenc.
+vpxenc() {
+  local readonly encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
+  local readonly input="${1}"
+  shift
+  eval "${VPX_TEST_PREFIX}" "${encoder}" "$input" "$@" ${devnull}
+}
+
 vpxenc_vp8_ivf() {
   if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
-    vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp8.ivf
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.ivf"
+    vpxenc --codec=vp8 \
+      --width="${YUV_RAW_INPUT_WIDTH}" \
+      --height="${YUV_RAW_INPUT_HEIGHT}" \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}" \
+      "${YUV_RAW_INPUT}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
   fi
 }
 
-vpxenc_vp8_ivf_pipe_input() {
+vpxenc_vp8_ivf_piped_input() {
   if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
-    vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp8.ivf -
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_piped_input.ivf"
+    cat "${YUV_RAW_INPUT}" \
+      | vpxenc --codec=vp8 \
+        --width="${YUV_RAW_INPUT_WIDTH}" \
+        --height="${YUV_RAW_INPUT_HEIGHT}" \
+        --limit="${TEST_FRAMES}" \
+        --ivf \
+        --output="${output}" \
+        -
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
   fi
 }
 
 vpxenc_vp8_webm() {
-  if [ "$(vpxenc_can_encode_vp8)" = "yes" ] &&
-     [ "$(webm_io_available)" = "yes" ] ; then
-    vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp8.webm
+  if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm"
+    vpxenc --codec=vp8 \
+      --width="${YUV_RAW_INPUT_WIDTH}" \
+      --height="${YUV_RAW_INPUT_HEIGHT}" \
+      --limit="${TEST_FRAMES}" \
+      --output="${output}" \
+      "${YUV_RAW_INPUT}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
   fi
 }
 
 vpxenc_vp9_ivf() {
   if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
-    vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp9.ivf
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf"
+    vpxenc --codec=vp9 \
+      --width="${YUV_RAW_INPUT_WIDTH}" \
+      --height="${YUV_RAW_INPUT_HEIGHT}" \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}" \
+      "${YUV_RAW_INPUT}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
   fi
 }
 
 vpxenc_vp9_webm() {
-  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] &&
-     [ "$(webm_io_available)" = "yes" ] ; then
-    vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp9.webm
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm"
+    vpxenc --codec=vp9 \
+      --width="${YUV_RAW_INPUT_WIDTH}" \
+      --height="${YUV_RAW_INPUT_HEIGHT}" \
+      --limit="${TEST_FRAMES}" \
+      --output="${output}" \
+      "${YUV_RAW_INPUT}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+vpxenc_vp9_ivf_lossless() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless.ivf"
+    vpxenc --codec=vp9 \
+      --width="${YUV_RAW_INPUT_WIDTH}" \
+      --height="${YUV_RAW_INPUT_HEIGHT}" \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}" \
+      --lossless=1 \
+      --test-decode=fatal \
+      "${YUV_RAW_INPUT}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
   fi
 }
 
-DISABLED_vpxenc_vp9_ivf_lossless() {
+vpxenc_vp9_ivf_minq0_maxq0() {
   if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
-    vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
-        "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless_minq0_maxq0.ivf"
+    vpxenc --codec=vp9 \
+      --width="${YUV_RAW_INPUT_WIDTH}" \
+      --height="${YUV_RAW_INPUT_HEIGHT}" \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}" \
+      --min-q=0 \
+      --max-q=0 \
+      --test-decode=fatal \
+      "${YUV_RAW_INPUT}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
   fi
 }
 
 vpxenc_tests="vpxenc_vp8_ivf
               vpxenc_vp8_webm
-              vpxenc_vp8_ivf_pipe_input
+              vpxenc_vp8_ivf_piped_input
               vpxenc_vp9_ivf
               vpxenc_vp9_webm
-              DISABLED_vpxenc_vp9_ivf_lossless"
+              vpxenc_vp9_ivf_lossless
+              vpxenc_vp9_ivf_minq0_maxq0"
 
 run_tests vpxenc_verify_environment "${vpxenc_tests}"
diff --git a/test/y4m_test.cc b/test/y4m_test.cc
index 060f8c40f..d4a2ede20 100644
--- a/test/y4m_test.cc
+++ b/test/y4m_test.cc
@@ -145,6 +145,14 @@ class Y4mVideoWriteTest
     delete tmpfile_;
   }
 
+  virtual void ReplaceInputFile(FILE *input_file) {
+    CloseSource();
+    frame_ = 0;
+    input_file_ = input_file;
+    rewind(input_file_);
+    ReadSourceToStart();
+  }
+
   // Writes out a y4m file and then reads it back
   void WriteY4mAndReadBack() {
     ASSERT_TRUE(input_file_ != NULL);
@@ -163,8 +171,7 @@ class Y4mVideoWriteTest
       write_image_file(img(), tmpfile_->file());
       Next();
     }
-    tmpfile_->CloseFile();
-    Y4mVideoSourceTest::Init(tmpfile_->file_name(), limit_);
+    ReplaceInputFile(tmpfile_->file());
   }
 
   virtual void Init(const std::string &file_name, int limit) {
diff --git a/usage.dox b/usage.dox
index 92fd6b26e..237b8dc42 100644
--- a/usage.dox
+++ b/usage.dox
@@ -57,9 +57,6 @@
     the vpx_codec_get_caps() method. Attempts to invoke features not supported
     by an algorithm will generally result in #VPX_CODEC_INCAPABLE.
 
-    Currently defined features available in both encoders and decoders include:
-    - \subpage usage_xma
-
     \if decoder
     Currently defined decoder features include:
     - \ref usage_cb
@@ -70,9 +67,7 @@
     To initialize a codec instance, the address of the codec context
     and interface structures are passed to an initialization function. Depending
     on the \ref usage_features that the codec supports, the codec could be
-    initialized in different modes. Most notably, the application may choose to
-    use \ref usage_xma mode to gain fine grained control over how and where
-    memory is allocated for the codec.
+    initialized in different modes.
 
     To prevent cases of confusion where the ABI of the library changes,
     the ABI is versioned. The ABI version number must be passed at
@@ -136,73 +131,3 @@
     possible."
 
 */
-
-
-/*! \page usage_xma External Memory Allocation
-    Applications that wish to have fine grained control over how and where
-    decoders allocate memory \ref MAY make use of the eXternal Memory Allocation
-    (XMA) interface. Not all codecs support the XMA \ref usage_features.
-
-    To use a decoder in XMA mode, the decoder \ref MUST be initialized with the
-    vpx_codec_xma_init_ver() function. The amount of memory a decoder needs to
-    allocate is heavily dependent on the size of the encoded video frames. The
-    size of the video must be known before requesting the decoder's memory map.
-    This stream information can be obtained with the vpx_codec_peek_stream_info()
-    function, which does not require a constructed decoder context. If the exact
-    stream is not known, a stream info structure can be created that reflects
-    the maximum size that the decoder instance is required to support.
-
-    Once the decoder instance has been initialized and the stream information
-    determined, the application calls the vpx_codec_get_mem_map() iterator
-    repeatedly to get a list of the memory segments requested by the decoder.
-    The iterator value should be initialized to NULL to request the first
-    element, and the function will return #VPX_CODEC_LIST_END to signal the end of
-    the list.
-
-    After each segment is identified, it must be passed to the codec through the
-    vpx_codec_set_mem_map() function. Segments \ref MUST be passed in the same
-    order as they are returned from vpx_codec_get_mem_map(), but there is no
-    requirement that vpx_codec_get_mem_map() must finish iterating before
-    vpx_codec_set_mem_map() is called. For instance, some applications may choose
-    to get a list of all requests, construct an optimal heap, and then set all
-    maps at once with one call. Other applications may set one map at a time,
-    allocating it immediately after it is returned from vpx_codec_get_mem_map().
-
-    After all segments have been set using vpx_codec_set_mem_map(), the codec may
-    be used as it would be in normal internal allocation mode.
-
-    \section usage_xma_seg_id Segment Identifiers
-    Each requested segment is identified by an identifier unique to
-    that decoder type. Some of these identifiers are private, while others are
-    enumerated for application use. Identifiers not enumerated publicly are
-    subject to change. Identifiers are non-consecutive.
-
-    \section usage_xma_seg_szalign Segment Size and Alignment
-    The sz (size) and align (alignment) parameters describe the required size
-    and alignment of the requested segment. Alignment will always be a power of
-    two. Applications \ref MUST honor the alignment requested. Failure to do so
-    could result in program crashes or may incur a speed penalty.
-
-    \section usage_xma_seg_flags Segment Flags
-    The flags member of the segment structure indicates any requirements or
-    desires of the codec for the particular segment. The #VPX_CODEC_MEM_ZERO flag
-    indicates that the segment \ref MUST be zeroed by the application prior to
-    passing it to the application. The #VPX_CODEC_MEM_WRONLY flag indicates that
-    the segment will only be written into by the decoder, not read. If this flag
-    is not set, the application \ref MUST insure that the memory segment is
-    readable. On some platforms, framebuffer memory is writable but not
-    readable, for example. The #VPX_CODEC_MEM_FAST flag indicates that the segment
-    will be frequently accessed, and that it should be placed into fast memory,
-    if any is available. The application \ref MAY choose to place other segments
-    in fast memory as well, but the most critical segments will be identified by
-    this flag.
-
-    \section usage_xma_seg_basedtor Segment Base Address and Destructor
-    For each requested memory segment, the application must determine the
-    address of a memory segment that meets the requirements of the codec. This
-    address is set in the <code>base</code> member of the #vpx_codec_mmap
-    structure. If the application requires processing when the segment is no
-    longer used by the codec (for instance to deallocate it or close an
-    associated file descriptor) the <code>dtor</code> and <code>priv</code>
-    members can be set.
-*/
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index ef7f61b12..a46fbfbbd 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -108,7 +108,7 @@ extern "C"
          * For temporal denoiser: noise_sensitivity = 0 means off,
          * noise_sensitivity = 1 means temporal denoiser on for Y channel only,
          * noise_sensitivity = 2 means temporal denoiser on for all channels.
-         * noise_sensitivity = 3 means aggressive denoising mode.
+         * noise_sensitivity >= 3 means aggressive denoising mode.
          * Temporal denoiser is enabled via the configuration option:
          * CONFIG_TEMPORAL_DENOISING.
          * For spatial denoiser: noise_sensitivity controls the amount of
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 611421575..7c012a829 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -35,10 +35,6 @@ typedef struct
     unsigned char *buffer;
     unsigned char *buffer_end;
     struct vpx_internal_error_info *error;
-
-    /* Variables used to track bit costs without outputing to the bitstream */
-    unsigned int  measure_cost;
-    unsigned long bit_counter;
 } BOOL_CODER;
 
 extern void vp8_start_encode(BOOL_CODER *bc, unsigned char *buffer, unsigned char *buffer_end);
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index c4c0de81b..75401fc2b 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -335,8 +335,16 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv,
     return FILTER_BLOCK;
 }
 
-void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser) {
-  if (!denoiser->aggressive_mode) {
+void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
+  assert(mode > 0);  // Denoiser is allocated only if mode > 0.
+  if (mode == 1) {
+    denoiser->denoiser_mode = kDenoiserOnYOnly;
+  } else if (mode == 2) {
+    denoiser->denoiser_mode = kDenoiserOnYUV;
+  } else {
+    denoiser->denoiser_mode = kDenoiserOnYUVAggressive;
+  }
+  if (denoiser->denoiser_mode != kDenoiserOnYUVAggressive) {
     denoiser->denoise_pars.scale_sse_thresh = 1;
     denoiser->denoise_pars.scale_motion_thresh = 8;
     denoiser->denoise_pars.scale_increase_filter = 0;
@@ -361,7 +369,6 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
     int i;
     assert(denoiser);
     denoiser->num_mb_cols = num_mb_cols;
-    denoiser->aggressive_mode = mode;
 
     for (i = 0; i < MAX_REF_FRAMES; i++)
     {
@@ -392,7 +399,7 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
 
     denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
     vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
-    vp8_denoiser_set_parameters(denoiser);
+    vp8_denoiser_set_parameters(denoiser, mode);
     return 0;
 }
 
@@ -420,8 +427,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                              loop_filter_info_n *lfi_n,
                              int mb_row,
                              int mb_col,
-                             int block_index,
-                             int uv_denoise)
+                             int block_index)
+
 {
     int mv_row;
     int mv_col;
@@ -558,7 +565,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
         denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ?
             kFilterNonZeroMV : kFilterZeroMV;
         // Only denoise UV for zero motion, and if y channel was denoised.
-        if (uv_denoise &&
+        if (denoiser->denoiser_mode != kDenoiserOnYOnly &&
             motion_magnitude2 == 0 &&
             decision == FILTER_BLOCK) {
           unsigned char *mc_running_avg_u =
@@ -595,7 +602,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                 denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
         denoiser->denoise_state[block_index] = kNoFilter;
     }
-    if (uv_denoise) {
+    if (denoiser->denoiser_mode != kDenoiserOnYOnly) {
       if (decision_u == COPY_BLOCK) {
         vp8_copy_mem8x8(
             x->block[16].src + *x->block[16].base_src, x->block[16].src_stride,
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index 1a42f86d3..89832d3c2 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -39,6 +39,13 @@ enum vp8_denoiser_filter_state {
   kFilterNonZeroMV
 };
 
+enum vp8_denoiser_mode {
+  kDenoiserOff,
+  kDenoiserOnYOnly,
+  kDenoiserOnYUV,
+  kDenoiserOnYUVAggressive
+};
+
 typedef struct {
   // Scale factor on sse threshold above which no denoising is done.
   unsigned int scale_sse_thresh;
@@ -67,7 +74,7 @@ typedef struct vp8_denoiser
     YV12_BUFFER_CONFIG yv12_mc_running_avg;
     unsigned char* denoise_state;
     int num_mb_cols;
-    int aggressive_mode;
+    int denoiser_mode;
     denoise_params denoise_pars;
 } VP8_DENOISER;
 
@@ -85,8 +92,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                              loop_filter_info_n *lfi_n,
                              int mb_row,
                              int mb_col,
-                             int block_index,
-                             int uv_denoise);
+                             int block_index);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 91b9138bf..7140f2f1b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -615,7 +615,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
         cpi->cyclic_refresh_mode_index = i;
 
 #if CONFIG_TEMPORAL_DENOISING
-        if (cpi->denoiser.aggressive_mode != 0 &&
+        if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive &&
             Q < (int)cpi->denoiser.denoise_pars.qp_thresh) {
           // Under aggressive denoising mode, use segmentation to turn off loop
           // filter below some qp thresh. The loop filter is turned off for all
@@ -1277,6 +1277,15 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
 
     vpx_free(cpi->tplist);
     CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
+
+#if CONFIG_TEMPORAL_DENOISING
+    if (cpi->oxcf.noise_sensitivity > 0) {
+      vp8_denoiser_free(&cpi->denoiser);
+      vp8_denoiser_allocate(&cpi->denoiser, width, height,
+                            cm->mb_rows, cm->mb_cols,
+                            cpi->oxcf.noise_sensitivity);
+    }
+#endif
 }
 
 
@@ -1771,7 +1780,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
         int height = (cpi->oxcf.Height + 15) & ~15;
         vp8_denoiser_allocate(&cpi->denoiser, width, height,
                               cm->mb_rows, cm->mb_cols,
-                              ((cpi->oxcf.noise_sensitivity == 3) ? 1 : 0));
+                              cpi->oxcf.noise_sensitivity);
       }
     }
 #endif
@@ -2762,19 +2771,6 @@ static int resize_key_frame(VP8_COMP *cpi)
             cm->Height = new_height;
             vp8_alloc_compressor_data(cpi);
             scale_and_extend_source(cpi->un_scaled_source, cpi);
-#if CONFIG_TEMPORAL_DENOISING
-            // TODO(marpan): denoiser_allocate() is not called in
-            // vp8_alloc_compressor_data() (currently denoiser_allocate is
-            // only called in change_config()). Check if we can move this call
-            // of denoiser_free/allocate into vp8_alloc_compressor_data().
-            if (cpi->oxcf.noise_sensitivity > 0) {
-              vp8_denoiser_free(&cpi->denoiser);
-              vp8_denoiser_allocate(&cpi->denoiser, new_width, new_height,
-                                    cm->mb_rows, cm->mb_cols,
-                                    ((cpi->oxcf.noise_sensitivity == 3) ?
-                                    1 : 0));
-            }
-#endif
             return 1;
         }
     }
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index ec1ea146f..d0ad7212d 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1174,7 +1174,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 #if CONFIG_TEMPORAL_DENOISING
     if (cpi->oxcf.noise_sensitivity)
     {
-        int uv_denoise = (cpi->oxcf.noise_sensitivity >= 2) ? 1 : 0;
         int block_index = mb_row * cpi->common.mb_cols + mb_col;
         if (x->best_sse_inter_mode == DC_PRED)
         {
@@ -1189,8 +1188,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
                                 recon_yoffset, recon_uvoffset,
                                 &cpi->common.lf_info, mb_row, mb_col,
-                                block_index, uv_denoise);
-
+                                block_index);
 
         /* Reevaluate ZEROMV after denoising. */
         if (best_mbmode.ref_frame == INTRA_FRAME &&
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 98d60160c..2f6f5d07c 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -2511,7 +2511,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 #if CONFIG_TEMPORAL_DENOISING
     if (cpi->oxcf.noise_sensitivity)
     {
-        int uv_denoise = (cpi->oxcf.noise_sensitivity == 2) ? 1 : 0;
         int block_index = mb_row * cpi->common.mb_cols + mb_col;
         if (x->best_sse_inter_mode == DC_PRED)
         {
@@ -2525,8 +2524,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
                                 recon_yoffset, recon_uvoffset,
                                 &cpi->common.lf_info, mb_row, mb_col,
-                                block_index, uv_denoise);
-
+                                block_index);
 
         /* Reevaluate ZEROMV after denoising. */
         if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 83938dd3d..d515fc0bd 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1316,9 +1316,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
         "vp8.fpf"           /* first pass filename */
 #endif
         VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
-#if CONFIG_SPATIAL_SVC
         {0},
-#endif
         {0},                /* ss_target_bitrate */
         1,                  /* ts_number_layers */
         {0},                /* ts_target_bitrate */
@@ -1342,8 +1340,6 @@ CODEC_INTERFACE(vpx_codec_vp8_cx) =
     vp8e_init,          /* vpx_codec_init_fn_t       init; */
     vp8e_destroy,       /* vpx_codec_destroy_fn_t    destroy; */
     vp8e_ctf_maps,      /* vpx_codec_ctrl_fn_map_t  *ctrl_maps; */
-    NOT_IMPLEMENTED,    /* vpx_codec_get_mmap_fn_t   get_mmap; */
-    NOT_IMPLEMENTED,    /* vpx_codec_set_mmap_fn_t   set_mmap; */
     {
         NOT_IMPLEMENTED,    /* vpx_codec_peek_si_fn_t    peek_si; */
         NOT_IMPLEMENTED,    /* vpx_codec_get_si_fn_t     get_si; */
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index ccc0afbe3..0fe0c921f 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -803,8 +803,6 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
     vp8_init,         /* vpx_codec_init_fn_t       init; */
     vp8_destroy,      /* vpx_codec_destroy_fn_t    destroy; */
     vp8_ctf_maps,     /* vpx_codec_ctrl_fn_map_t  *ctrl_maps; */
-    NOT_IMPLEMENTED,  /* vpx_codec_get_mmap_fn_t   get_mmap; */
-    NOT_IMPLEMENTED,  /* vpx_codec_set_mmap_fn_t   set_mmap; */
     {
         vp8_peek_si,      /* vpx_codec_peek_si_fn_t    peek_si; */
         vp8_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 0c2898d21..d77631341 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -27,10 +27,10 @@ extern "C" {
 
 // Bitstream profiles indicated by 2-3 bits in the uncompressed header.
 // 00: Profile 0.  8-bit 4:2:0 only.
-// 10: Profile 1.  Adds 4:4:4, 4:2:2, and 4:4:0 to Profile 0.
-// 01: Profile 2.  Supports 10-bit and 12-bit color only, with 4:2:0 sampling.
-// 110: Profile 3. Supports 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
-//                 subsampling.
+// 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.
+// 01: Profile 2.  10-bit and 12-bit color only, with 4:2:0 sampling.
+// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
+//                 sampling.
 // 111: Undefined profile.
 typedef enum BITSTREAM_PROFILE {
   PROFILE_0,
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 3253bcbf4..7f595e1cc 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -81,6 +81,16 @@ static const int sinpi_4_9 = 15212;
 
 static INLINE int dct_const_round_shift(int input) {
   int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+  // For valid VP9 input streams, intermediate stage coefficients should always
+  // stay within the range of a signed 16 bit integer. Coefficients can go out
+  // of this range for invalid/corrupt VP9 streams. However, strictly checking
+  // this range for every intermediate coefficient can burdensome for a decoder,
+  // therefore the following assertion is only enabled when configured with
+  // --enable-coefficient-range-checking.
+  assert(INT16_MIN <= rv);
+  assert(rv <= INT16_MAX);
+#endif
   return (int16_t)rv;
 }
 
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 0fe58c5c8..ab64d3036 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -20,7 +20,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              int block, int mi_row, int mi_col) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
-  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+  const MODE_INFO *prev_mi = !cm->error_resilient_mode && cm->prev_mi
         ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
         : NULL;
   const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 97b267c0b..ae32aff7d 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -188,11 +188,6 @@ typedef struct VP9Common {
   int error_resilient_mode;
   int frame_parallel_decoding_mode;
 
-  // Flag indicates if prev_mi can be used in coding:
-  //   0: encoder assumes decoder does not have prev_mi
-  //   1: encoder assumes decoder has and uses prev_mi
-  unsigned int coding_use_prev_mi;
-
   int log2_tile_cols, log2_tile_rows;
 
   // Private data associated with the frame buffer callbacks.
@@ -207,6 +202,12 @@ typedef struct VP9Common {
   ENTROPY_CONTEXT *above_context;
 } VP9_COMMON;
 
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) {
+  if (index < 0 || index >= REF_FRAMES)
+    return NULL;
+  return &cm->frame_bufs[cm->ref_frame_map[index]].buf;
+}
+
 static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
   return &cm->frame_bufs[cm->new_fb_idx].buf;
 }
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index cdf5fb916..014638466 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -366,7 +366,7 @@ int vp9_get_tx_size_context(const MACROBLOCKD *xd) {
   return (above_ctx + left_ctx) > max_tx_size;
 }
 
-int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
+int vp9_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids,
                        BLOCK_SIZE bsize, int mi_row, int mi_col) {
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
   const int bw = num_8x8_blocks_wide_lookup[bsize];
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 1a7ba86e4..2c965068a 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -26,7 +26,7 @@ static INLINE const MODE_INFO *get_left_mi(const MACROBLOCKD *const xd) {
   return xd->left_available ? xd->mi[-1] : NULL;
 }
 
-int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
+int vp9_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids,
                        BLOCK_SIZE bsize, int mi_row, int mi_col);
 
 static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 8d917919b..708f41b87 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
 specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
 specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const
 specialize qw/vp9_sad4x4x8 sse4/;
 
 add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;
 
 add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad32x64x4d sse2/;
@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, co
 specialize qw/vp9_sad16x32x4d sse2/;
 
 add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;
 
 add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad16x16x4d sse2/;
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index a448bd2b4..07971687c 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1095,6 +1095,40 @@ BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
   return (BITSTREAM_PROFILE) profile;
 }
 
+static void read_bitdepth_colorspace_sampling(
+    VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
+  if (cm->profile >= PROFILE_2)
+    cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
+  cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
+  if (cm->color_space != SRGB) {
+    vp9_rb_read_bit(rb);  // [16,235] (including xvycc) vs [0,255] range
+    if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+      cm->subsampling_x = vp9_rb_read_bit(rb);
+      cm->subsampling_y = vp9_rb_read_bit(rb);
+      if (cm->subsampling_x == 1 && cm->subsampling_y == 1)
+        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                           "4:2:0 color not supported in profile 1 or 3");
+      if (vp9_rb_read_bit(rb))
+        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reserved bit set");
+    } else {
+      cm->subsampling_y = cm->subsampling_x = 1;
+    }
+  } else {
+    if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+      // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
+      // 4:2:2 or 4:4:0 chroma sampling is not allowed.
+      cm->subsampling_y = cm->subsampling_x = 0;
+      if (vp9_rb_read_bit(rb))
+        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reserved bit set");
+    } else {
+      vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                         "4:4:4 color not supported in profile 0 or 2");
+    }
+  }
+}
+
 static size_t read_uncompressed_header(VP9Decoder *pbi,
                                        struct vp9_read_bit_buffer *rb) {
   VP9_COMMON *const cm = &pbi->common;
@@ -1137,32 +1171,8 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
     if (!vp9_read_sync_code(rb))
       vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                          "Invalid frame sync code");
-    if (cm->profile > PROFILE_1)
-      cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
-    cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
-    if (cm->color_space != SRGB) {
-      vp9_rb_read_bit(rb);  // [16,235] (including xvycc) vs [0,255] range
-      if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
-        cm->subsampling_x = vp9_rb_read_bit(rb);
-        cm->subsampling_y = vp9_rb_read_bit(rb);
-        if (vp9_rb_read_bit(rb))
-          vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
-                             "Reserved bit set");
-      } else {
-        cm->subsampling_y = cm->subsampling_x = 1;
-      }
-    } else {
-      if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
-        cm->subsampling_y = cm->subsampling_x = 0;
-        if (vp9_rb_read_bit(rb))
-          vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
-                             "Reserved bit set");
-      } else {
-        vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
-                           "4:4:4 color not supported in profile 0");
-      }
-    }
 
+    read_bitdepth_colorspace_sampling(cm, rb);
     pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
 
     for (i = 0; i < REFS_PER_FRAME; ++i) {
@@ -1181,15 +1191,18 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
       if (!vp9_read_sync_code(rb))
         vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                            "Invalid frame sync code");
+      if (cm->profile > PROFILE_0) {
+        read_bitdepth_colorspace_sampling(cm, rb);
+      } else {
+        // NOTE: The intra-only frame header does not include the specification
+        // of either the color format or color sub-sampling in profile 0. VP9
+        // specifies that the default color space should be YUV 4:2:0 in this
+        // case (normative).
+        cm->color_space = BT_601;
+        cm->subsampling_y = cm->subsampling_x = 1;
+      }
 
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
-
-      // NOTE: The intra-only frame header does not include the specification of
-      // either the color format or color sub-sampling. VP9 specifies that the
-      // default color space should be YUV 4:2:0 in this case (normative).
-      cm->color_space = BT_601;
-      cm->subsampling_y = cm->subsampling_x = 1;
-
       setup_frame_size(cm, rb);
     } else {
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
@@ -1220,11 +1233,9 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
   }
 
   if (!cm->error_resilient_mode) {
-    cm->coding_use_prev_mi = 1;
     cm->refresh_frame_context = vp9_rb_read_bit(rb);
     cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb);
   } else {
-    cm->coding_use_prev_mi = 0;
     cm->refresh_frame_context = 0;
     cm->frame_parallel_decoding_mode = 1;
   }
@@ -1400,7 +1411,7 @@ void vp9_decode_frame(VP9Decoder *pbi,
 
   init_macroblockd(cm, &pbi->mb);
 
-  if (cm->coding_use_prev_mi)
+  if (!cm->error_resilient_mode)
     set_prev_mi(cm);
   else
     cm->prev_mi = NULL;
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 0343c214c..2a2f0f5fa 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -181,17 +181,6 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
   return cm->error.error_code;
 }
 
-
-int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) {
-  VP9_COMMON *cm = &pbi->common;
-
-  if (index < 0 || index >= REF_FRAMES)
-    return -1;
-
-  *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
-  return 0;
-}
-
 /* If any buffer updating is signaled it should be done here. */
 static void swap_frame_buffers(VP9Decoder *pbi) {
   int ref_index = 0, mask;
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 8e16e1cac..223b66fc7 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -74,9 +74,6 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                       VP9_REFFRAME ref_frame_flag,
                                       YV12_BUFFER_CONFIG *sd);
 
-int vp9_get_reference_dec(struct VP9Decoder *pbi,
-                          int index, YV12_BUFFER_CONFIG **fb);
-
 struct VP9Decoder *vp9_decoder_create();
 
 void vp9_decoder_remove(struct VP9Decoder *pbi);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 573af9122..b0ff0fa81 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -78,13 +78,13 @@ static void prob_diff_update(const vp9_tree_index *tree,
     vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
 }
 
-static void write_selected_tx_size(const VP9_COMP *cpi,
+static void write_selected_tx_size(const VP9_COMMON *cm,
+                                   const MACROBLOCKD *xd,
                                    TX_SIZE tx_size, BLOCK_SIZE bsize,
                                    vp9_writer *w) {
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
-                                                 &cpi->common.fc.tx_probs);
+                                                 &cm->fc.tx_probs);
   vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
     vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
@@ -93,14 +93,13 @@ static void write_selected_tx_size(const VP9_COMP *cpi,
   }
 }
 
-static int write_skip(const VP9_COMP *cpi, int segment_id, const MODE_INFO *mi,
-                      vp9_writer *w) {
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
+static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                      int segment_id, const MODE_INFO *mi, vp9_writer *w) {
+  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
     const int skip = mi->mbmi.skip;
-    vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
+    vp9_write(w, skip, vp9_get_skip_prob(cm, xd));
     return skip;
   }
 }
@@ -121,7 +120,7 @@ static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w) {
 }
 
 static void pack_mb_tokens(vp9_writer *w,
-                           TOKENEXTRA **tp, const TOKENEXTRA *stop) {
+                           TOKENEXTRA **tp, const TOKENEXTRA *const stop) {
   TOKENEXTRA *p = *tp;
 
   while (p < stop && p->token != EOSB_TOKEN) {
@@ -188,9 +187,8 @@ static void write_segment_id(vp9_writer *w, const struct segmentation *seg,
 }
 
 // This function encodes the reference frame
-static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) {
-  const VP9_COMMON *const cm = &cpi->common;
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                             vp9_writer *w) {
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int is_compound = has_second_ref(mbmi);
   const int segment_id = mbmi->segment_id;
@@ -252,7 +250,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
     }
   }
 
-  skip = write_skip(cpi, segment_id, mi, w);
+  skip = write_skip(cm, xd, segment_id, mi, w);
 
   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
     vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
@@ -260,7 +258,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
       !(is_inter &&
         (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
-    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
+    write_selected_tx_size(cm, xd, mbmi->tx_size, bsize, w);
   }
 
   if (!is_inter) {
@@ -281,7 +279,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
   } else {
     const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
     const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx];
-    write_ref_frames(cpi, w);
+    write_ref_frames(cm, xd, w);
 
     // If segment skip is not enabled code the mode.
     if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
@@ -329,10 +327,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
   }
 }
 
-static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
-                              vp9_writer *w) {
-  const VP9_COMMON *const cm = &cpi->common;
-  const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+                              MODE_INFO **mi_8x8, vp9_writer *w) {
   const struct segmentation *const seg = &cm->seg;
   const MODE_INFO *const mi = mi_8x8[0];
   const MODE_INFO *const above_mi = mi_8x8[-xd->mi_stride];
@@ -343,10 +339,10 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
   if (seg->update_map)
     write_segment_id(w, seg, mbmi->segment_id);
 
-  write_skip(cpi, mbmi->segment_id, mi, w);
+  write_skip(cm, xd, mbmi->segment_id, mi, w);
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
-    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
+    write_selected_tx_size(cm, xd, mbmi->tx_size, bsize, w);
 
   if (bsize >= BLOCK_8X8) {
     write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
@@ -368,9 +364,10 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
 }
 
 static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
-                          vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+                          vp9_writer *w, TOKENEXTRA **tok,
+                          const TOKENEXTRA *const tok_end,
                           int mi_row, int mi_col) {
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   MODE_INFO *m;
 
@@ -382,7 +379,7 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
                  mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
                  cm->mi_rows, cm->mi_cols);
   if (frame_is_intra_only(cm)) {
-    write_mb_modes_kf(cpi, xd->mi, w);
+    write_mb_modes_kf(cm, xd, xd->mi, w);
   } else {
     pack_inter_mode_mvs(cpi, m, w);
   }
@@ -391,7 +388,8 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
   pack_mb_tokens(w, tok, tok_end);
 }
 
-static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
+static void write_partition(const VP9_COMMON *const cm,
+                            const MACROBLOCKD *const xd,
                             int hbs, int mi_row, int mi_col,
                             PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -413,17 +411,17 @@ static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
 }
 
 static void write_modes_sb(VP9_COMP *cpi,
-                           const TileInfo *const tile,
-                           vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+                           const TileInfo *const tile, vp9_writer *w,
+                           TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
                            int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
   const int bsl = b_width_log2(bsize);
   const int bs = (1 << bsl) / 4;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
-  MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
+  const MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
@@ -469,8 +467,8 @@ static void write_modes_sb(VP9_COMP *cpi,
 }
 
 static void write_modes(VP9_COMP *cpi,
-                        const TileInfo *const tile,
-                        vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
+                        const TileInfo *const tile, vp9_writer *w,
+                        TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
   int mi_row, mi_col;
 
   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
@@ -731,7 +729,7 @@ static void write_delta_q(struct vp9_write_bit_buffer *wb, int delta_q) {
   }
 }
 
-static void encode_quantization(VP9_COMMON *cm,
+static void encode_quantization(const VP9_COMMON *const cm,
                                 struct vp9_write_bit_buffer *wb) {
   vp9_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
   write_delta_q(wb, cm->y_dc_delta_q);
@@ -739,11 +737,11 @@ static void encode_quantization(VP9_COMMON *cm,
   write_delta_q(wb, cm->uv_ac_delta_q);
 }
 
-static void encode_segmentation(VP9_COMP *cpi,
+static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
                                 struct vp9_write_bit_buffer *wb) {
   int i, j;
 
-  struct segmentation *seg = &cpi->common.seg;
+  const struct segmentation *seg = &cm->seg;
 
   vp9_wb_write_bit(wb, seg->enabled);
   if (!seg->enabled)
@@ -753,7 +751,7 @@ static void encode_segmentation(VP9_COMP *cpi,
   vp9_wb_write_bit(wb, seg->update_map);
   if (seg->update_map) {
     // Select the coding strategy (temporal or spatial)
-    vp9_choose_segmap_coding_method(cpi);
+    vp9_choose_segmap_coding_method(cm, xd);
     // Write out probabilities used to decode unpredicted  macro-block segments
     for (i = 0; i < SEG_TREE_PROBS; i++) {
       const int prob = seg->tree_probs[i];
@@ -869,7 +867,8 @@ static void fix_interp_filter(VP9_COMMON *cm) {
   }
 }
 
-static void write_tile_info(VP9_COMMON *cm, struct vp9_write_bit_buffer *wb) {
+static void write_tile_info(const VP9_COMMON *const cm,
+                            struct vp9_write_bit_buffer *wb) {
   int min_log2_tile_cols, max_log2_tile_cols, ones;
   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
 
@@ -903,7 +902,7 @@ static int get_refresh_mask(VP9_COMP *cpi) {
            (cpi->refresh_golden_frame << cpi->alt_fb_idx);
   } else {
     int arf_idx = cpi->alt_fb_idx;
-    if ((cpi->pass == 2) && cpi->multi_arf_allowed) {
+    if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
       arf_idx = gf_group->arf_update_idx[gf_group->index];
     }
@@ -1041,6 +1040,29 @@ static void write_profile(BITSTREAM_PROFILE profile,
   }
 }
 
+static void write_bitdepth_colorspace_sampling(
+    VP9_COMMON *const cm, struct vp9_write_bit_buffer *wb) {
+  if (cm->profile >= PROFILE_2) {
+    assert(cm->bit_depth > BITS_8);
+    vp9_wb_write_bit(wb, cm->bit_depth - BITS_10);
+  }
+  vp9_wb_write_literal(wb, cm->color_space, 3);
+  if (cm->color_space != SRGB) {
+    vp9_wb_write_bit(wb, 0);  // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
+    if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+      assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
+      vp9_wb_write_bit(wb, cm->subsampling_x);
+      vp9_wb_write_bit(wb, cm->subsampling_y);
+      vp9_wb_write_bit(wb, 0);  // unused
+    } else {
+      assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
+    }
+  } else {
+    assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
+    vp9_wb_write_bit(wb, 0);  // unused
+  }
+}
+
 static void write_uncompressed_header(VP9_COMP *cpi,
                                       struct vp9_write_bit_buffer *wb) {
   VP9_COMMON *const cm = &cpi->common;
@@ -1055,25 +1077,8 @@ static void write_uncompressed_header(VP9_COMP *cpi,
   vp9_wb_write_bit(wb, cm->error_resilient_mode);
 
   if (cm->frame_type == KEY_FRAME) {
-    const COLOR_SPACE cs = UNKNOWN;
     write_sync_code(wb);
-    if (cm->profile > PROFILE_1) {
-      assert(cm->bit_depth > BITS_8);
-      vp9_wb_write_bit(wb, cm->bit_depth - BITS_10);
-    }
-    vp9_wb_write_literal(wb, cs, 3);
-    if (cs != SRGB) {
-      vp9_wb_write_bit(wb, 0);  // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
-      if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
-        vp9_wb_write_bit(wb, cm->subsampling_x);
-        vp9_wb_write_bit(wb, cm->subsampling_y);
-        vp9_wb_write_bit(wb, 0);  // unused
-      }
-    } else {
-      assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
-      vp9_wb_write_bit(wb, 0);  // unused
-    }
-
+    write_bitdepth_colorspace_sampling(cm, wb);
     write_frame_size(cm, wb);
   } else {
     if (!cm->show_frame)
@@ -1085,6 +1090,11 @@ static void write_uncompressed_header(VP9_COMP *cpi,
     if (cm->intra_only) {
       write_sync_code(wb);
 
+      // Note for profile 0, 420 8bpp is assumed.
+      if (cm->profile > PROFILE_0) {
+        write_bitdepth_colorspace_sampling(cm, wb);
+      }
+
       vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
       write_frame_size(cm, wb);
     } else {
@@ -1114,7 +1124,7 @@ static void write_uncompressed_header(VP9_COMP *cpi,
 
   encode_loopfilter(&cm->lf, wb);
   encode_quantization(cm, wb);
-  encode_segmentation(cpi, wb);
+  encode_segmentation(cm, &cpi->mb.e_mbd, wb);
 
   write_tile_info(cm, wb);
 }
@@ -1206,7 +1216,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
   saved_wb = wb;
   vp9_wb_write_literal(&wb, 0, 16);  // don't know in advance first part. size
 
-  uncompressed_hdr_size = vp9_rb_bytes_written(&wb);
+  uncompressed_hdr_size = vp9_wb_bytes_written(&wb);
   data += uncompressed_hdr_size;
 
   vp9_clear_system_state();
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index ed7029ede..bd3b0fdc8 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -41,6 +41,7 @@ struct macroblock_plane {
   int16_t *zbin;
   int16_t *round;
 
+  int64_t quant_thred[2];
   // Zbin Over Quant value
   int16_t zbin_extra;
 };
@@ -115,7 +116,9 @@ struct macroblock {
   int quant_fp;
 
   // skip forward transform and quantization
-  int skip_txfm;
+  int skip_txfm[MAX_MB_PLANE];
+
+  int64_t bsse[MAX_MB_PLANE];
 
   // Used to store sub partition's choices.
   MV pred_mv[MAX_REF_FRAMES];
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index b11a0ae3b..d60e6c3eb 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -33,7 +33,7 @@ typedef struct {
   int is_coded;
   int num_4x4_blk;
   int skip;
-  int skip_txfm;
+  int skip_txfm[MAX_MB_PLANE];
   int best_mode_index;
   int hybrid_pred_diff;
   int comp_pred_diff;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index d7efc5981..b20b662f3 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -188,11 +188,9 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
-static void duplicate_mode_info_in_sb(VP9_COMMON * const cm,
-                                     MACROBLOCKD *const xd,
-                                     int mi_row,
-                                     int mi_col,
-                                     BLOCK_SIZE bsize) {
+static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
+                                      int mi_row, int mi_col,
+                                      BLOCK_SIZE bsize) {
   const int block_width = num_8x8_blocks_wide_lookup[bsize];
   const int block_height = num_8x8_blocks_high_lookup[bsize];
   int i, j;
@@ -799,9 +797,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
-static void update_stats(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  const MACROBLOCK *const x = &cpi->mb;
+static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
@@ -913,7 +909,7 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
 
   if (output_enabled) {
-    update_stats(cpi);
+    update_stats(&cpi->common, &cpi->mb);
 
     (*tp)->token = EOSB_TOKEN;
     (*tp)++;
@@ -1323,15 +1319,13 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   }
 
   x->skip = ctx->skip;
-  x->skip_txfm = mbmi->segment_id ? 0 : ctx->skip_txfm;
+  x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0];
 }
 
 static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
                         TOKENEXTRA **tp, int mi_row, int mi_col,
                      int output_enabled, BLOCK_SIZE bsize,
                      PICK_MODE_CONTEXT *ctx) {
-
-
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
   update_state_rt(cpi, ctx, mi_row, mi_col, bsize);
 
@@ -1343,7 +1337,7 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
 #endif
 
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
-  update_stats(cpi);
+  update_stats(&cpi->common, &cpi->mb);
 
   (*tp)->token = EOSB_TOKEN;
   (*tp)++;
@@ -1366,7 +1360,6 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
     return;
 
   if (bsize >= BLOCK_8X8) {
-    MACROBLOCKD *const xd = &cpi->mb.e_mbd;
     const int idx_str = xd->mi_stride * mi_row + mi_col;
     MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -1729,10 +1722,9 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
 //
 // The min and max are assumed to have been initialized prior to calling this
 // function so repeat calls can accumulate a min and max of more than one sb64.
-static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8,
-                                        BLOCK_SIZE * min_block_size,
-                                        BLOCK_SIZE * max_block_size ) {
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
+                                        BLOCK_SIZE *min_block_size,
+                                        BLOCK_SIZE *max_block_size ) {
   int sb_width_in_blocks = MI_BLOCK_SIZE;
   int sb_height_in_blocks  = MI_BLOCK_SIZE;
   int i, j;
@@ -1787,17 +1779,17 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
     if (cm->frame_type != KEY_FRAME) {
       MODE_INFO **const prev_mi =
           &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
-      get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size);
+      get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
       MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, left_sb64_mi, &min_size, &max_size);
+      get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
       MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, above_sb64_mi, &min_size, &max_size);
+      get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size);
     }
     // adjust observed min and max
     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
@@ -2484,7 +2476,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
       } else {
         GF_GROUP * gf_grp = &cpi->twopass.gf_group;
         int last_was_mid_sequence_overlay = 0;
-        if ((cpi->pass == 2) && (gf_grp->index)) {
+        if ((cpi->oxcf.pass == 2) && (gf_grp->index)) {
           if (gf_grp->update_type[gf_grp->index - 1] == OVERLAY_UPDATE)
             last_was_mid_sequence_overlay = 1;
         }
@@ -2706,6 +2698,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                                  int mi_col, BLOCK_SIZE bsize, int *rate,
                                  int64_t *dist, int do_recon, int64_t best_rd,
                                  PC_TREE *pc_tree) {
+  const SPEED_FEATURES *const sf = &cpi->sf;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2737,18 +2731,18 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
 
   // Determine partition types in search according to the speed features.
   // The threshold set here has to be of square block size.
-  if (cpi->sf.auto_min_max_partition_size) {
-    partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
-                               bsize >= cpi->sf.min_partition_size);
-    partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
-                                bsize >  cpi->sf.min_partition_size) ||
+  if (sf->auto_min_max_partition_size) {
+    partition_none_allowed &= (bsize <= sf->max_partition_size &&
+                               bsize >= sf->min_partition_size);
+    partition_horz_allowed &= ((bsize <= sf->max_partition_size &&
+                                bsize > sf->min_partition_size) ||
                                 force_horz_split);
-    partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
-                                bsize >  cpi->sf.min_partition_size) ||
+    partition_vert_allowed &= ((bsize <= sf->max_partition_size &&
+                                bsize > sf->min_partition_size) ||
                                 force_vert_split);
-    do_split &= bsize > cpi->sf.min_partition_size;
+    do_split &= bsize > sf->min_partition_size;
   }
-  if (cpi->sf.use_square_partition_only) {
+  if (sf->use_square_partition_only) {
     partition_horz_allowed &= force_horz_split;
     partition_vert_allowed &= force_vert_split;
   }
@@ -2758,7 +2752,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
                         &this_rate, &this_dist, bsize, ctx);
     ctx->mic.mbmi = xd->mi[0]->mbmi;
-    ctx->skip_txfm = x->skip_txfm;
+    ctx->skip_txfm[0] = x->skip_txfm[0];
     ctx->skip = x->skip;
 
     if (this_rate != INT_MAX) {
@@ -2827,7 +2821,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     } else {
       // skip rectangular partition test when larger block size
       // gives better rd cost
-      if (cpi->sf.less_rectangular_check)
+      if (sf->less_rectangular_check)
         do_rect &= !partition_none_allowed;
     }
   }
@@ -2835,7 +2829,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   // PARTITION_HORZ
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    if (cpi->sf.adaptive_motion_search)
+    if (sf->adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
@@ -2843,7 +2837,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                         &pc_tree->horizontal[0]);
 
     pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
-    pc_tree->horizontal[0].skip_txfm = x->skip_txfm;
+    pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
     pc_tree->horizontal[0].skip = x->skip;
 
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -2855,7 +2849,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                           &pc_tree->horizontal[1]);
 
       pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
-      pc_tree->horizontal[1].skip_txfm = x->skip_txfm;
+      pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->horizontal[1].skip = x->skip;
 
       if (this_rate == INT_MAX) {
@@ -2880,14 +2874,14 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    if (cpi->sf.adaptive_motion_search)
+    if (sf->adaptive_motion_search)
       load_pred_mv(x, ctx);
 
     nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
                         &this_rate, &this_dist, subsize,
                         &pc_tree->vertical[0]);
     pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
-    pc_tree->vertical[0].skip_txfm = x->skip_txfm;
+    pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
     pc_tree->vertical[0].skip = x->skip;
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
     if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
@@ -2896,7 +2890,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                           &this_rate, &this_dist, subsize,
                           &pc_tree->vertical[1]);
       pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
-      pc_tree->vertical[1].skip_txfm = x->skip_txfm;
+      pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->vertical[1].skip = x->skip;
       if (this_rate == INT_MAX) {
         sum_rd = INT64_MAX;
@@ -2937,12 +2931,12 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     // Check the projected output rate for this SB against it's target
     // and and if necessary apply a Q delta using segmentation to get
     // closer to the target.
-    if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
+    if ((oxcf->aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
       vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
                                     best_rate);
     }
 
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+    if (oxcf->aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
                                               best_rate, best_dist);
 
@@ -2987,20 +2981,20 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
                           subsize, &pc_tree->none);
       pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
-      pc_tree->none.skip_txfm = x->skip_txfm;
+      pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
       pc_tree->none.skip = x->skip;
       break;
     case PARTITION_VERT:
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
                           subsize, &pc_tree->vertical[0]);
       pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
-      pc_tree->vertical[0].skip_txfm = x->skip_txfm;
+      pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->vertical[0].skip = x->skip;
       if (mi_col + hbs < cm->mi_cols) {
         nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
                             &rate, &dist, subsize, &pc_tree->vertical[1]);
         pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
-        pc_tree->vertical[1].skip_txfm = x->skip_txfm;
+        pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->vertical[1].skip = x->skip;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
@@ -3013,13 +3007,13 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
                           subsize, &pc_tree->horizontal[0]);
       pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
-      pc_tree->horizontal[0].skip_txfm = x->skip_txfm;
+      pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->horizontal[0].skip = x->skip;
       if (mi_row + hbs < cm->mi_rows) {
         nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                             &rate, &dist, subsize, &pc_tree->horizontal[0]);
         pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
-        pc_tree->horizontal[1].skip_txfm = x->skip_txfm;
+        pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->horizontal[1].skip = x->skip;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
@@ -3073,9 +3067,10 @@ static void nonrd_use_partition(VP9_COMP *cpi,
 
 static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 int mi_row, TOKENEXTRA **tp) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCK *x = &cpi->mb;
-  MACROBLOCKD *xd = &x->e_mbd;
+  SPEED_FEATURES *const sf = &cpi->sf;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
   int mi_col;
 
   // Initialize the left context for the new SB row
@@ -3085,7 +3080,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
   // Code each SB in the row
   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
        mi_col += MI_BLOCK_SIZE) {
-    MACROBLOCK *x = &cpi->mb;
     int dummy_rate = 0;
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -3098,7 +3092,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     vp9_zero(x->pred_mv);
 
     // Set the partition type of the 64X64 block
-    switch (cpi->sf.partition_search_type) {
+    switch (sf->partition_search_type) {
       case VAR_BASED_PARTITION:
         choose_partitioning(cpi, tile, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
@@ -3111,20 +3105,20 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
         break;
       case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
-        bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
-                cpi->sf.always_this_block_size :
+        bsize = sf->partition_search_type == FIXED_PARTITION ?
+                sf->always_this_block_size :
                 get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
         nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist, cpi->pc_root);
         break;
       case REFERENCE_PARTITION:
-        if (cpi->sf.partition_check ||
+        if (sf->partition_check ||
             !is_background(cpi, tile, mi_row, mi_col)) {
           set_modeinfo_offsets(cm, xd, mi_row, mi_col);
           auto_partition_range(cpi, tile, mi_row, mi_col,
-                               &cpi->sf.min_partition_size,
-                               &cpi->sf.max_partition_size);
+                               &sf->min_partition_size,
+                               &sf->max_partition_size);
           nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                                &dummy_rate, &dummy_dist, 1, INT64_MAX,
                                cpi->pc_root);
@@ -3144,8 +3138,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
 // end RTC play code
 
 static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
-  SPEED_FEATURES *const sf = &cpi->sf;
-  VP9_COMMON *const cm = &cpi->common;
+  const SPEED_FEATURES *const sf = &cpi->sf;
+  const VP9_COMMON *const cm = &cpi->common;
 
   const uint8_t *src = cpi->Source->y_buffer;
   const uint8_t *last_src = cpi->Last_Source->y_buffer;
@@ -3304,19 +3298,19 @@ static void encode_frame_internal(VP9_COMP *cpi) {
   vp9_zero(rd_opt->tx_select_diff);
   vp9_zero(rd_opt->tx_select_threshes);
 
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 &&
-                           cm->y_dc_delta_q == 0 &&
-                           cm->uv_dc_delta_q == 0 &&
-                           cm->uv_ac_delta_q == 0;
+  xd->lossless = cm->base_qindex == 0 &&
+                 cm->y_dc_delta_q == 0 &&
+                 cm->uv_dc_delta_q == 0 &&
+                 cm->uv_ac_delta_q == 0;
 
   cm->tx_mode = select_tx_mode(cpi);
 
-  cpi->mb.fwd_txm4x4 = cpi->mb.e_mbd.lossless ? vp9_fwht4x4 : vp9_fdct4x4;
-  cpi->mb.itxm_add = cpi->mb.e_mbd.lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
+  x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
+  x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
 
-  if (cpi->mb.e_mbd.lossless) {
-    cpi->mb.optimize = 0;
-    cpi->common.lf.filter_level = 0;
+  if (xd->lossless) {
+    x->optimize = 0;
+    cm->lf.filter_level = 0;
     cpi->zbin_mode_boost_enabled = 0;
   }
 
@@ -3328,7 +3322,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
   set_prev_mi(cm);
 
   x->quant_fp = cpi->sf.use_quant_fp;
-  x->skip_txfm = 0;
+  vp9_zero(x->skip_txfm);
   if (sf->use_nonrd_pick_mode) {
     // Initialize internal buffer pointers for rtc coding, where non-RD
     // mode decision is used and hence no buffer pointer swap needed.
@@ -3568,7 +3562,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   MODE_INFO **mi_8x8 = xd->mi;
   MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
-  unsigned int segment_id = mbmi->segment_id;
+  const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
+                                             SEG_LVL_SKIP);
   const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -3578,6 +3573,9 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
                    cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
                    cpi->sf.allow_skip_recode;
 
+  if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
+    vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
+
   x->skip_optimize = ctx->is_coded;
   ctx->is_coded = 1;
   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
@@ -3612,7 +3610,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
       vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
                            &xd->block_refs[ref]->sf);
     }
-    if (!cpi->sf.reuse_inter_pred_sby)
+    if (!cpi->sf.reuse_inter_pred_sby || seg_skip)
       vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
 
     vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
@@ -3623,8 +3621,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
       vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
     } else {
       mbmi->skip = 1;
-      if (output_enabled &&
-          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+      if (output_enabled && !seg_skip)
         cm->counts.skip[vp9_get_skip_context(xd)][1]++;
       reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
     }
@@ -3633,9 +3630,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   if (output_enabled) {
     if (cm->tx_mode == TX_MODE_SELECT &&
         mbmi->sb_type >= BLOCK_8X8  &&
-        !(is_inter_block(mbmi) &&
-            (mbmi->skip ||
-             vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
+        !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
       ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
                       &cm->counts.tx)[mbmi->tx_size];
     } else {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index a409d6a17..8a737e18e 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -475,22 +475,22 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
     return;
   }
 
-  if (x->skip_txfm == 0) {
-    // full forward transform and quantization
-    if (!x->skip_recode) {
+  if (!x->skip_recode) {
+    if (x->skip_txfm[plane] == 0) {
+      // full forward transform and quantization
       if (x->quant_fp)
         vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
       else
         vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+    } else if (x->skip_txfm[plane] == 2) {
+      // fast path forward transform and quantization
+      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+    } else {
+      // skip forward transform
+      p->eobs[block] = 0;
+      *a = *l = 0;
+      return;
     }
-  } else if (x->skip_txfm == 2) {
-    // fast path forward transform and quantization
-    vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
-  } else {
-    // skip forward transform
-    p->eobs[block] = 0;
-    *a = *l = 0;
-    return;
   }
 
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6ba3f1cc9..8d7b07b1e 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -65,9 +65,6 @@ void vp9_coef_tree_initialize();
 #ifdef OUTPUT_YUV_DENOISED
 FILE *yuv_denoised_file = NULL;
 #endif
-#ifdef OUTPUT_YUV_SRC
-FILE *yuv_file;
-#endif
 #ifdef OUTPUT_YUV_REC
 FILE *yuv_rec_file;
 #endif
@@ -524,6 +521,7 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
 
   cm->profile = oxcf->profile;
   cm->bit_depth = oxcf->bit_depth;
+  cm->color_space = UNKNOWN;
 
   cm->width = oxcf->width;
   cm->height = oxcf->height;
@@ -552,23 +550,6 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
   set_tile_limits(cpi);
 }
 
-static int get_pass(MODE mode) {
-  switch (mode) {
-    case REALTIME:
-    case ONE_PASS_GOOD:
-    case ONE_PASS_BEST:
-      return 0;
-
-    case TWO_PASS_FIRST:
-      return 1;
-
-    case TWO_PASS_SECOND_GOOD:
-    case TWO_PASS_SECOND_BEST:
-      return 2;
-  }
-  return -1;
-}
-
 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -583,7 +564,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
     assert(cm->bit_depth > BITS_8);
 
   cpi->oxcf = *oxcf;
-  cpi->pass = get_pass(cpi->oxcf.mode);
 
   rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
 
@@ -653,7 +633,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
 
   if ((cpi->svc.number_temporal_layers > 1 &&
       cpi->oxcf.rc_mode == VPX_CBR) ||
-      (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
+      (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) {
     vp9_update_layer_context_change_config(cpi,
                                            (int)cpi->oxcf.target_bandwidth);
   }
@@ -747,7 +727,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
   cpi->use_svc = 0;
 
   init_config(cpi, oxcf);
-  vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc);
+  vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
 
   cm->current_video_frame = 0;
 
@@ -799,7 +779,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
   // pending further tuning and testing. The code is left in place here
   // as a place holder in regard to the required paths.
   cpi->multi_arf_last_grp_enabled = 0;
-  if (cpi->pass == 2) {
+  if (oxcf->pass == 2) {
     if (cpi->use_svc) {
       cpi->multi_arf_allowed = 0;
       cpi->multi_arf_enabled = 0;
@@ -871,9 +851,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
   yuv_denoised_file = fopen("denoised.yuv", "ab");
 #endif
 #endif
-#ifdef OUTPUT_YUV_SRC
-  yuv_file = fopen("bd.yuv", "ab");
-#endif
 #ifdef OUTPUT_YUV_REC
   yuv_rec_file = fopen("rec.yuv", "wb");
 #endif
@@ -887,9 +864,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
 
   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
 
-  if (cpi->pass == 1) {
+  if (oxcf->pass == 1) {
     vp9_init_first_pass(cpi);
-  } else if (cpi->pass == 2) {
+  } else if (oxcf->pass == 2) {
     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
 
@@ -1065,7 +1042,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
     vp9_clear_system_state();
 
     // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count);
-    if (cpi->pass != 1) {
+    if (cpi->oxcf.pass != 1) {
       FILE *f = fopen("opsnr.stt", "a");
       double time_encoded = (cpi->last_end_time_stamp_seen
                              - cpi->first_time_stamp_ever) / 10000000.000;
@@ -1149,9 +1126,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
   fclose(yuv_denoised_file);
 #endif
 #endif
-#ifdef OUTPUT_YUV_SRC
-  fclose(yuv_file);
-#endif
 #ifdef OUTPUT_YUV_REC
   fclose(yuv_rec_file);
 #endif
@@ -1302,16 +1276,6 @@ int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
   }
 }
 
-int vp9_get_reference_enc(VP9_COMP *cpi, int index, YV12_BUFFER_CONFIG **fb) {
-  VP9_COMMON *cm = &cpi->common;
-
-  if (index < 0 || index >= REF_FRAMES)
-    return -1;
-
-  *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
-  return 0;
-}
-
 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                           YV12_BUFFER_CONFIG *sd) {
   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
@@ -1329,35 +1293,6 @@ int vp9_update_entropy(VP9_COMP * cpi, int update) {
   return 0;
 }
 
-
-#if defined(OUTPUT_YUV_SRC)
-void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s, FILE *f) {
-  uint8_t *src = s->y_buffer;
-  int h = s->y_height;
-
-  do {
-    fwrite(src, s->y_width, 1, f);
-    src += s->y_stride;
-  } while (--h);
-
-  src = s->u_buffer;
-  h = s->uv_height;
-
-  do {
-    fwrite(src, s->uv_width, 1, f);
-    src += s->uv_stride;
-  } while (--h);
-
-  src = s->v_buffer;
-  h = s->uv_height;
-
-  do {
-    fwrite(src, s->uv_width, 1, f);
-    src += s->uv_stride;
-  } while (--h);
-}
-#endif
-
 #if CONFIG_VP9_TEMPORAL_DENOISING
 #if defined(OUTPUT_YUV_DENOISED)
 // The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
@@ -1589,7 +1524,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
   } else { /* For non key/golden frames */
     if (cpi->refresh_alt_ref_frame) {
       int arf_idx = cpi->alt_fb_idx;
-      if ((cpi->pass == 2) && cpi->multi_arf_allowed) {
+      if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
         const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
         arf_idx = gf_group->arf_update_idx[gf_group->index];
       }
@@ -2070,7 +2005,7 @@ static void set_arf_sign_bias(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   int arf_sign_bias;
 
-  if ((cpi->pass == 2) && cpi->multi_arf_allowed) {
+  if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
     arf_sign_bias = cpi->rc.source_alt_ref_active &&
                     (!cpi->refresh_alt_ref_frame ||
@@ -2158,9 +2093,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
       (cpi->oxcf.frame_parallel_decoding_mode != 0);
 
     // By default, encoder assumes decoder can use prev_mi.
-    cm->coding_use_prev_mi = 1;
     if (cm->error_resilient_mode) {
-      cm->coding_use_prev_mi = 0;
       cm->frame_parallel_decoding_mode = 1;
       cm->reset_frame_context = 0;
       cm->refresh_frame_context = 0;
@@ -2174,19 +2107,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // static regions if indicated.
   // Only allowed in second pass of two pass (as requires lagged coding)
   // and if the relevant speed feature flag is set.
-  if (cpi->pass == 2 && cpi->sf.static_segmentation)
+  if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
     configure_static_seg_features(cpi);
 
   // Check if the current frame is skippable for the partition search in the
   // second pass according to the first pass stats
-  if (cpi->pass == 2 &&
+  if (cpi->oxcf.pass == 2 &&
       (!cpi->use_svc || is_spatial_svc(cpi))) {
     configure_skippable_frame(cpi);
   }
 
   // For 1 pass CBR, check if we are dropping this frame.
   // Never drop on key frame.
-  if (cpi->pass == 0 &&
+  if (cpi->oxcf.pass == 0 &&
       cpi->oxcf.rc_mode == VPX_CBR &&
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi)) {
@@ -2223,10 +2156,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   }
 #endif
 
-#ifdef OUTPUT_YUV_SRC
-  vp9_write_yuv_frame(cpi->Source, yuv_file);
-#endif
-
   set_speed_features(cpi);
 
   // Decide q and q bounds.
@@ -2444,9 +2373,16 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
-  if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) {
+  if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
+      (subsampling_x != 1 || subsampling_y != 1)) {
+    vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
+                       "Non-4:2:0 color space requires profile 1 or 3");
+    res = -1;
+  }
+  if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
+      (subsampling_x == 1 && subsampling_y == 1)) {
     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
-                       "Non-4:2:0 color space requires profile >= 1");
+                       "4:2:0 color space requires profile 0 or 2");
     res = -1;
   }
 
@@ -2511,7 +2447,7 @@ static int get_arf_src_index(VP9_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   int arf_src_index = 0;
   if (is_altref_enabled(cpi)) {
-    if (cpi->pass == 2) {
+    if (cpi->oxcf.pass == 2) {
       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
         arf_src_index = gf_group->arf_src_offset[gf_group->index];
@@ -2526,7 +2462,7 @@ static int get_arf_src_index(VP9_COMP *cpi) {
 static void check_src_altref(VP9_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
 
-  if (cpi->pass == 2) {
+  if (cpi->oxcf.pass == 2) {
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
     rc->is_src_frame_alt_ref =
       (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
@@ -2559,7 +2495,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   if (!cpi)
     return -1;
 
-  if (is_spatial_svc(cpi) && cpi->pass == 2) {
+  if (is_spatial_svc(cpi) && cpi->oxcf.pass == 2) {
 #if CONFIG_SPATIAL_SVC
     vp9_svc_lookahead_peek(cpi, cpi->lookahead, 0, 1);
 #endif
@@ -2673,7 +2609,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
   } else {
     *size = 0;
-    if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) {
+    if (flush && cpi->oxcf.pass == 1 && !cpi->twopass.first_pass_done) {
       vp9_end_first_pass(cpi);    /* get last stats packet */
       cpi->twopass.first_pass_done = 1;
     }
@@ -2711,7 +2647,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   if (!cpi->use_svc && cpi->multi_arf_allowed) {
     if (cm->frame_type == KEY_FRAME) {
       init_buffer_indices(cpi);
-    } else if (cpi->pass == 2) {
+    } else if (cpi->oxcf.pass == 2) {
       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
       cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
     }
@@ -2719,7 +2655,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
   cpi->frame_flags = *frame_flags;
 
-  if (cpi->pass == 2 &&
+  if (cpi->oxcf.pass == 2 &&
       cm->current_video_frame == 0 &&
       cpi->oxcf.allow_spatial_resampling &&
       cpi->oxcf.rc_mode == VPX_VBR) {
@@ -2757,13 +2693,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     vp9_vaq_init();
   }
 
-  if (cpi->pass == 1 &&
+  if (cpi->oxcf.pass == 1 &&
       (!cpi->use_svc || is_spatial_svc(cpi))) {
     const int lossless = is_lossless_requested(&cpi->oxcf);
     cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
     cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
     vp9_first_pass(cpi);
-  } else if (cpi->pass == 2 &&
+  } else if (cpi->oxcf.pass == 2 &&
       (!cpi->use_svc || is_spatial_svc(cpi))) {
     Pass2Encode(cpi, size, dest, frame_flags);
   } else if (cpi->use_svc) {
@@ -2788,19 +2724,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   // Save layer specific state.
   if ((cpi->svc.number_temporal_layers > 1 &&
       cpi->oxcf.rc_mode == VPX_CBR) ||
-      (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
+      (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) {
     vp9_save_layer_context(cpi);
   }
 
   vpx_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
 
-  if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame)
+  if (cpi->b_calculate_psnr && cpi->oxcf.pass != 1 && cm->show_frame)
     generate_psnr_packet(cpi);
 
 #if CONFIG_INTERNAL_STATS
 
-  if (cpi->pass != 1) {
+  if (cpi->oxcf.pass != 1) {
     cpi->bytes += (int)(*size);
 
     if (cm->show_frame) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index acff17351..c841da267 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -143,6 +143,7 @@ typedef struct VP9EncoderConfig {
   unsigned int rc_max_intra_bitrate_pct;
 
   MODE mode;
+  int pass;
 
   // Key Framing Operations
   int auto_key;  // autodetect cut scenes and set the keyframes
@@ -313,9 +314,6 @@ typedef struct VP9_COMP {
   MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
   int mbgraph_n_frames;             // number of frames filled in the above
   int static_mb_pct;                // % forced skip mbs by segmentation
-
-  int pass;
-
   int ref_frame_flags;
 
   SPEED_FEATURES sf;
@@ -463,9 +461,6 @@ void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                            YV12_BUFFER_CONFIG *sd);
 
-int vp9_get_reference_enc(VP9_COMP *cpi, int index,
-                          YV12_BUFFER_CONFIG **fb);
-
 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                           YV12_BUFFER_CONFIG *sd);
 
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 0f363a7c3..d36548996 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -24,7 +24,7 @@
 #include "vp9/encoder/vp9_quantize.h"
 
 static int get_max_filter_level(const VP9_COMP *cpi) {
-  if (cpi->pass == 2) {
+  if (cpi->oxcf.pass == 2) {
     return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
                                                  : MAX_LOOP_FILTER;
   } else {
@@ -82,7 +82,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
     // Bias against raising loop filter in favor of lowering it.
     int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
 
-    if ((cpi->pass == 2) && (cpi->twopass.section_intra_rating < 20))
+    if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
       bias = (bias * cpi->twopass.section_intra_rating) / 20;
 
     // yx, bias less for large block size
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 7a1600155..6115f5a0f 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -212,11 +212,11 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
   *sse_y = sse;
 
   if (sse < dc_quant * dc_quant >> 6)
-    x->skip_txfm = 1;
+    x->skip_txfm[0] = 1;
   else if (var < ac_quant * ac_quant >> 6)
-    x->skip_txfm = 2;
+    x->skip_txfm[0] = 2;
   else
-    x->skip_txfm = 0;
+    x->skip_txfm[0] = 0;
 
   if (cpi->common.tx_mode == TX_MODE_SELECT) {
     if (sse > (var << 2))
@@ -458,7 +458,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                            sf, sf);
 
-      if (cm->coding_use_prev_mi)
+      if (!cm->error_resilient_mode)
         vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame,
                          candidates, mi_row, mi_col);
       else
@@ -558,7 +558,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           if (cost < best_cost) {
             best_filter = filter;
             best_cost = cost;
-            skip_txfm = x->skip_txfm;
+            skip_txfm = x->skip_txfm[0];
 
             if (cpi->sf.reuse_inter_pred_sby) {
               if (this_mode_pred != current_pred) {
@@ -584,7 +584,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         dist = pf_dist[mbmi->interp_filter];
         var_y = pf_var[mbmi->interp_filter];
         sse_y = pf_sse[mbmi->interp_filter];
-        x->skip_txfm = skip_txfm;
+        x->skip_txfm[0] = skip_txfm;
       } else {
         mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
         vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
@@ -622,7 +622,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         best_pred_filter = mbmi->interp_filter;
         best_tx_size = mbmi->tx_size;
         best_ref_frame = ref_frame;
-        skip_txfm = x->skip_txfm;
+        skip_txfm = x->skip_txfm[0];
 
         if (cpi->sf.reuse_inter_pred_sby) {
           if (best_pred != NULL)
@@ -664,7 +664,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   mbmi->ref_frame[0]  = best_ref_frame;
   mbmi->mv[0].as_int  = frame_mv[best_mode][best_ref_frame].as_int;
   xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
-  x->skip_txfm = skip_txfm;
+  x->skip_txfm[0] = skip_txfm;
 
   // Perform intra prediction search, if the best SAD is above a certain
   // threshold.
@@ -673,7 +673,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     int i, j;
     const int width  = num_4x4_blocks_wide_lookup[bsize];
     const int height = num_4x4_blocks_high_lookup[bsize];
-    const BLOCK_SIZE bsize_tx = txsize_to_bsize[mbmi->tx_size];
 
     int rate2 = 0;
     int64_t dist2 = 0;
@@ -683,28 +682,36 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
     TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize],
                               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+    const BLOCK_SIZE bsize_tx = txsize_to_bsize[tmp_tx_size];
     const int step = 1 << tmp_tx_size;
 
-    for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
-      if (cpi->sf.reuse_inter_pred_sby) {
-        pd->dst.buf = tmp[0].data;
-        pd->dst.stride = bw;
-      }
+    if (cpi->sf.reuse_inter_pred_sby) {
+      pd->dst.buf = tmp[0].data;
+      pd->dst.stride = bw;
+    }
 
+    for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
+      uint8_t *const src_buf_base = p->src.buf;
+      uint8_t *const dst_buf_base = pd->dst.buf;
       for (j = 0; j < height; j += step) {
         for (i = 0; i < width; i += step) {
+          p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
+          pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
+          // Use source buffer as an approximation for the fully reconstructed
+          // buffer
           vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize),
                                   tmp_tx_size, this_mode,
-                                  &p->src.buf[4 * (j * dst_stride + i)],
-                                  src_stride,
-                                  &pd->dst.buf[4 * (j * dst_stride + i)],
-                                  dst_stride, i, j, 0);
+                                  p->src.buf, src_stride,
+                                  pd->dst.buf, dst_stride,
+                                  i, j, 0);
           model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y);
           rate2 += rate;
           dist2 += dist;
           ++block_idx;
         }
       }
+      p->src.buf = src_buf_base;
+      pd->dst.buf = dst_buf_base;
 
       rate = rate2;
       dist = dist2;
@@ -726,7 +733,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         mbmi->uv_mode = this_mode;
         mbmi->mv[0].as_int = INVALID_MV;
       } else {
-        x->skip_txfm = skip_txfm;
+        x->skip_txfm[0] = skip_txfm;
       }
     }
   }
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index e153b2077..eababdbca 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -23,15 +23,14 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
                      const int16_t *round_ptr, const int16_t quant,
                      int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                      const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  int eob = -1;
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int tmp, eob = -1;
 
   if (!skip_block) {
-    const int rc = 0;
-    const int coeff = coeff_ptr[rc];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-    int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
     tmp = (tmp * quant) >> 16;
     qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
@@ -45,15 +44,15 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant,
                            int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  int eob = -1;
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int tmp, eob = -1;
 
   if (!skip_block) {
-    const int rc = 0;
-    const int coeff = coeff_ptr[rc];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
 
-    int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
     tmp = (tmp * quant) >> 15;
     qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
@@ -354,6 +353,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
   x->plane[0].quant_shift = quants->y_quant_shift[qindex];
   x->plane[0].zbin = quants->y_zbin[qindex];
   x->plane[0].round = quants->y_round[qindex];
+  x->plane[0].quant_thred[0] = cm->y_dequant[qindex][0] *
+                                  cm->y_dequant[qindex][0];
+  x->plane[0].quant_thred[1] = cm->y_dequant[qindex][1] *
+                                  cm->y_dequant[qindex][1];
   x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
   xd->plane[0].dequant = cm->y_dequant[qindex];
 
@@ -365,6 +368,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
     x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
     x->plane[i].zbin = quants->uv_zbin[qindex];
     x->plane[i].round = quants->uv_round[qindex];
+    x->plane[i].quant_thred[0] = cm->y_dequant[qindex][0] *
+                                    cm->y_dequant[qindex][0];
+    x->plane[i].quant_thred[1] = cm->y_dequant[qindex][1] *
+                                    cm->y_dequant[qindex][1];
     x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
     xd->plane[i].dequant = cm->uv_dequant[qindex];
   }
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 73c6b89e8..04ab79538 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -277,7 +277,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
 
   if (cpi->common.frame_type == KEY_FRAME) {
     return rc->rate_correction_factors[KF_STD];
-  } else if (cpi->pass == 2) {
+  } else if (cpi->oxcf.pass == 2) {
     RATE_FACTOR_LEVEL rf_lvl =
       cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
     return rc->rate_correction_factors[rf_lvl];
@@ -296,7 +296,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
 
   if (cpi->common.frame_type == KEY_FRAME) {
     rc->rate_correction_factors[KF_STD] = factor;
-  } else if (cpi->pass == 2) {
+  } else if (cpi->oxcf.pass == 2) {
     RATE_FACTOR_LEVEL rf_lvl =
       cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
     rc->rate_correction_factors[rf_lvl] = factor;
@@ -923,7 +923,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
 int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
                              int *bottom_index, int *top_index) {
   int q;
-  if (cpi->pass == 0) {
+  if (cpi->oxcf.pass == 0) {
     if (cpi->oxcf.rc_mode == VPX_CBR)
       q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
     else
@@ -991,7 +991,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
     // this frame refreshes means next frames don't unless specified by user
     rc->frames_since_golden = 0;
 
-    if (cpi->pass == 2) {
+    if (cpi->oxcf.pass == 2) {
       if (!rc->source_alt_ref_pending &&
           cpi->twopass.gf_group.rf_level[0] == GF_ARF_STD)
       rc->source_alt_ref_active = 0;
@@ -1242,7 +1242,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
           (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
     }
 
-    if (cpi->pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
     }
   } else {
@@ -1260,7 +1260,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
       cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
     }
 
-    if (cpi->pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
   }
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 633ce088d..4fc3e9e08 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -121,7 +121,7 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
   const int q = vp9_dc_quant(qindex, 0);
   int rdmult = 88 * q * q / 24;
 
-  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+  if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
     const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 5caafd370..cc55dd78f 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -171,15 +171,27 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
   int64_t dist_sum = 0;
   const int ref = xd->mi[0]->mbmi.ref_frame[0];
   unsigned int sse;
+  const int shift = 8;
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
 
-    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
-                              pd->dst.buf, pd->dst.stride, &sse);
+    const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+                                                pd->dst.buf, pd->dst.stride,
+                                                &sse);
 
+    if (!x->select_tx_size) {
+      if (sse < p->quant_thred[0] >> shift)
+        x->skip_txfm[i] = 1;
+      else if (var < p->quant_thred[1] >> shift)
+        x->skip_txfm[i] = 2;
+      else
+        x->skip_txfm[i] = 0;
+    }
+
+    x->bsse[i] = sse;
     if (i == 0)
       x->pred_sse[ref] = sse;
 
@@ -357,12 +369,32 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
   if (args->skip)
     return;
 
-  if (!is_inter_block(mbmi))
+  if (!is_inter_block(mbmi)) {
     vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
-  else
-    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+    dist_block(plane, block, tx_size, args);
+  } else {
+    if (x->skip_txfm[plane] == 0) {
+      // full forward transform and quantization
+      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+      dist_block(plane, block, tx_size, args);
+    } else if (x->skip_txfm[plane] == 2) {
+      // compute DC coefficient
+      int16_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
+      int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+      args->sse  = x->bsse[plane] << 4;
+      args->dist = args->sse;
+      if (!x->plane[plane].eobs[block])
+        args->dist = args->sse - ((coeff[0] * coeff[0] -
+            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
+    } else {
+      // skip forward transform
+      x->plane[plane].eobs[block] = 0;
+      args->sse  = x->bsse[plane] << 4;
+      args->dist = args->sse;
+    }
+  }
 
-  dist_block(plane, block, tx_size, args);
   rate_block(plane, block, plane_bsize, tx_size, args);
   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
@@ -2102,6 +2134,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   int orig_dst_stride[MAX_MB_PLANE];
   int rs = 0;
   INTERP_FILTER best_filter = SWITCHABLE;
+  int skip_txfm[MAX_MB_PLANE] = {0};
+  int64_t bsse[MAX_MB_PLANE] = {0};
 
   int bsl = mi_width_log2_lookup[bsize];
   int pred_filter_search = cpi->sf.cb_pred_filter_search ?
@@ -2264,6 +2298,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           best_filter = mbmi->interp_filter;
           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
             best_needs_copy = !best_needs_copy;
+          vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
+          vpx_memcpy(bsse, x->bsse, sizeof(bsse));
         }
 
         if ((cm->interp_filter == SWITCHABLE && newbest) ||
@@ -2316,6 +2352,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                               disable_skip);
   }
 
+  vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
+  vpx_memcpy(x->bsse, bsse, sizeof(bsse));
+
   if (!x->skip) {
     int skippable_y, skippable_uv;
     int64_t sseuv = INT64_MAX;
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 897ae0129..d5676c3d1 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -110,14 +110,12 @@ static int cost_segmap(int *segcounts, vp9_prob *probs) {
   return cost;
 }
 
-static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
-                       MODE_INFO **mi,
+static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
+                       const TileInfo *tile, MODE_INFO **mi,
                        int *no_pred_segcounts,
                        int (*temporal_predictor_count)[2],
                        int *t_unpred_seg_counts,
                        int bw, int bh, int mi_row, int mi_col) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   int segment_id;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
@@ -151,14 +149,13 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
-static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
-                          MODE_INFO **mi,
+static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
+                          const TileInfo *tile, MODE_INFO **mi,
                           int *no_pred_segcounts,
                           int (*temporal_predictor_count)[2],
                           int *t_unpred_seg_counts,
                           int mi_row, int mi_col,
                           BLOCK_SIZE bsize) {
-  const VP9_COMMON *const cm = &cpi->common;
   const int mis = cm->mi_stride;
   int bw, bh;
   const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
@@ -170,18 +167,18 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
   bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
 
   if (bw == bs && bh == bs) {
-    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, bs, mi_row, mi_col);
   } else if (bw == bs && bh < bs) {
-    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-    count_segs(cpi, tile, mi + hbs * mis, no_pred_segcounts,
+    count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
                temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
                mi_row + hbs, mi_col);
   } else if (bw < bs && bh == bs) {
-    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-    count_segs(cpi, tile, mi + hbs,
+    count_segs(cm, xd, tile, mi + hbs,
                no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts,
                hbs, bs, mi_row, mi_col + hbs);
   } else {
@@ -194,7 +191,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
       const int mi_dc = hbs * (n & 1);
       const int mi_dr = hbs * (n >> 1);
 
-      count_segs_sb(cpi, tile, &mi[mi_dr * mis + mi_dc],
+      count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc],
                     no_pred_segcounts, temporal_predictor_count,
                     t_unpred_seg_counts,
                     mi_row + mi_dr, mi_col + mi_dc, subsize);
@@ -202,8 +199,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
-void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
+void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) {
   struct segmentation *seg = &cm->seg;
 
   int no_pred_cost;
@@ -237,7 +233,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
       MODE_INFO **mi = mi_ptr;
       for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
            mi_col += 8, mi += 8)
-        count_segs_sb(cpi, &tile, mi, no_pred_segcounts,
+        count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts,
                       temporal_predictor_count, t_unpred_seg_counts,
                       mi_row, mi_col, BLOCK_64X64);
     }
diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h
index 50dd562c8..8c6944ad1 100644
--- a/vp9/encoder/vp9_segmentation.h
+++ b/vp9/encoder/vp9_segmentation.h
@@ -42,7 +42,7 @@ void vp9_clear_segdata(struct segmentation *seg,
 void vp9_set_segment_data(struct segmentation *seg, signed char *feature_data,
                           unsigned char abs_delta);
 
-void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
+void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd);
 
 void vp9_reset_segment_features(struct segmentation *seg);
 
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index e770f33e9..f2e99cf22 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -405,11 +405,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
   // Slow quant, dct and trellis not worthwhile for first pass
   // so make sure they are always turned off.
-  if (cpi->pass == 1)
+  if (oxcf->pass == 1)
     sf->optimize_coefficients = 0;
 
   // No recode for 1 pass.
-  if (cpi->pass == 0) {
+  if (oxcf->pass == 0) {
     sf->recode_loop = DISALLOW_RECODE;
     sf->optimize_coefficients = 0;
   }
@@ -418,7 +418,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
     cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
   }
 
-  cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1;
+  cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
 
   if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
     sf->adaptive_pred_interp_filter = 0;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 2eca8fcef..ce3b31138 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -405,7 +405,7 @@ static void adjust_arnr_filter(VP9_COMP *cpi,
   }
 
   // Adjustments for second level arf in multi arf case.
-  if (cpi->pass == 2 && cpi->multi_arf_allowed) {
+  if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
     if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
       cpi->active_arnr_strength >>= 1;
diff --git a/vp9/encoder/vp9_write_bit_buffer.c b/vp9/encoder/vp9_write_bit_buffer.c
index 962d0ca56..6d55e84e8 100644
--- a/vp9/encoder/vp9_write_bit_buffer.c
+++ b/vp9/encoder/vp9_write_bit_buffer.c
@@ -8,9 +8,10 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <limits.h>
 #include "vp9/encoder/vp9_write_bit_buffer.h"
 
-size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
+size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb) {
   return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
 }
 
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h
index 073608d7f..59f9bbe30 100644
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ b/vp9/encoder/vp9_write_bit_buffer.h
@@ -11,8 +11,6 @@
 #ifndef VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_
 #define VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_
 
-#include <limits.h>
-
 #include "vpx/vpx_integer.h"
 
 #ifdef __cplusplus
@@ -24,7 +22,7 @@ struct vp9_write_bit_buffer {
   size_t bit_offset;
 };
 
-size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb);
+size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb);
 
 void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit);
 
diff --git a/vp9/encoder/vp9_writer.c b/vp9/encoder/vp9_writer.c
index 8398fc07a..ff461f218 100644
--- a/vp9/encoder/vp9_writer.c
+++ b/vp9/encoder/vp9_writer.c
@@ -15,7 +15,6 @@
 void vp9_start_encode(vp9_writer *br, uint8_t *source) {
   br->lowvalue = 0;
   br->range    = 255;
-  br->value    = 0;
   br->count    = -24;
   br->buffer   = source;
   br->pos      = 0;
diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h
index 938924be3..9d161f95c 100644
--- a/vp9/encoder/vp9_writer.h
+++ b/vp9/encoder/vp9_writer.h
@@ -22,14 +22,9 @@ extern "C" {
 typedef struct {
   unsigned int lowvalue;
   unsigned int range;
-  unsigned int value;
   int count;
   unsigned int pos;
   uint8_t *buffer;
-
-  // Variables used to track bit costs without outputing to the bitstream
-  unsigned int  measure_cost;
-  uint64_t bit_counter;
 } vp9_writer;
 
 void vp9_start_encode(vp9_writer *bc, uint8_t *buffer);
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
index f31b176e5..1feed6256 100644
--- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src,
   sum_ref3 = _mm256_set1_epi16(0);
   for (i = 0; i < 32 ; i++) {
     // load src and all refs
-    src_reg = _mm256_load_si256((__m256i *)(src));
+    src_reg = _mm256_loadu_si256((__m256i *)(src));
     ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
     ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
     ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
@@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src,
   sum_ref3 = _mm256_set1_epi16(0);
   for (i = 0; i < 64 ; i++) {
     // load 64 bytes from src and all refs
-    src_reg = _mm256_load_si256((__m256i *)(src));
-    srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+    src_reg = _mm256_loadu_si256((__m256i *)(src));
+    srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
     ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
     ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
     ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
index 34ed1867f..9aa4da962 100644
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
 #define LOAD_SRC_DST \
   /* load source and destination */ \
   src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
-  dst_reg = _mm256_load_si256((__m256i const *) (dst));
+  dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
 
 #define AVG_NEXT_SRC(src_reg, size_stride) \
   src_next_reg = _mm256_loadu_si256((__m256i const *) \
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 82aa97cb0..bf8eec717 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -337,12 +337,15 @@ static vpx_codec_err_t set_encoder_config(
   switch (cfg->g_pass) {
     case VPX_RC_ONE_PASS:
       oxcf->mode = ONE_PASS_GOOD;
+      oxcf->pass = 0;
       break;
     case VPX_RC_FIRST_PASS:
       oxcf->mode = TWO_PASS_FIRST;
+      oxcf->pass = 1;
       break;
     case VPX_RC_LAST_PASS:
       oxcf->mode = TWO_PASS_SECOND_BEST;
+      oxcf->pass = 2;
       break;
   }
 
@@ -804,6 +807,20 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
   return index_sz;
 }
 
+// vp9 uses 10,000,000 ticks/second as time stamp
+#define TICKS_PER_SEC 10000000LL
+
+static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
+                                       int64_t n) {
+  return n * TICKS_PER_SEC * timebase->num / timebase->den;
+}
+
+static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase,
+                                       int64_t n) {
+  const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
+  return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
+}
+
 static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
                                       const vpx_image_t *img,
                                       vpx_codec_pts_t pts,
@@ -811,6 +828,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
                                       vpx_enc_frame_flags_t flags,
                                       unsigned long deadline) {
   vpx_codec_err_t res = VPX_CODEC_OK;
+  const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
 
   if (img != NULL) {
     res = validate_img(ctx, img);
@@ -856,7 +874,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
   if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
     unsigned int lib_flags = 0;
     YV12_BUFFER_CONFIG sd;
-    int64_t dst_time_stamp, dst_end_time_stamp;
+    int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
+    int64_t dst_end_time_stamp =
+        timebase_units_to_ticks(timebase, pts + duration);
     size_t size, cx_data_sz;
     unsigned char *cx_data;
 
@@ -864,12 +884,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
     if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
       ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
 
-    /* vp9 use 10,000,000 ticks/second as time stamp */
-    dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num)
-                     / ctx->cfg.g_timebase.den;
-    dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num /
-                         ctx->cfg.g_timebase.den;
-
     if (img != NULL) {
       res = image2yuvconfig(img, &sd);
 
@@ -906,9 +920,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
                                          cx_data, &dst_time_stamp,
                                          &dst_end_time_stamp, !img)) {
       if (size) {
-        vpx_codec_pts_t round, delta;
-        vpx_codec_cx_pkt_t pkt;
         VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
+        vpx_codec_cx_pkt_t pkt;
 
 #if CONFIG_SPATIAL_SVC
         if (is_spatial_svc(cpi))
@@ -933,15 +946,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
         }
 
         // Add the frame packet to the list of returned packets.
-        round = (vpx_codec_pts_t)10000000 * ctx->cfg.g_timebase.num / 2 - 1;
-        delta = (dst_end_time_stamp - dst_time_stamp);
         pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-        pkt.data.frame.pts =
-          (dst_time_stamp * ctx->cfg.g_timebase.den + round)
-          / ctx->cfg.g_timebase.num / 10000000;
-        pkt.data.frame.duration = (unsigned long)
-          ((delta * ctx->cfg.g_timebase.den + round)
-          / ctx->cfg.g_timebase.num / 10000000);
+        pkt.data.frame.pts = ticks_to_timebase_units(timebase, dst_time_stamp);
+        pkt.data.frame.duration =
+           (unsigned long)ticks_to_timebase_units(timebase,
+               dst_end_time_stamp - dst_time_stamp);
         pkt.data.frame.flags = lib_flags << 16;
 
         if (lib_flags & FRAMEFLAGS_KEY
@@ -959,9 +968,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
           // prior PTS so that if a decoder uses pts to schedule when
           // to do this, we start right after last frame was decoded.
           // Invisible frames have no duration.
-          pkt.data.frame.pts = ((cpi->last_time_stamp_seen
-                                 * ctx->cfg.g_timebase.den + round)
-                                / ctx->cfg.g_timebase.num / 10000000) + 1;
+          pkt.data.frame.pts =
+              ticks_to_timebase_units(timebase, cpi->last_time_stamp_seen) + 1;
           pkt.data.frame.duration = 0;
         }
 
@@ -1048,9 +1056,9 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
   vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *);
 
   if (frame != NULL) {
-    YV12_BUFFER_CONFIG *fb;
+    YV12_BUFFER_CONFIG *fb = get_ref_frame(&ctx->cpi->common, frame->idx);
+    if (fb == NULL) return VPX_CODEC_ERROR;
 
-    vp9_get_reference_enc(ctx->cpi, frame->idx, &fb);
     yuvconfig2image(&frame->img, fb, NULL);
     return VPX_CODEC_OK;
   } else {
@@ -1317,9 +1325,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
       9999,               // kf_max_dist
 
       VPX_SS_DEFAULT_LAYERS,  // ss_number_layers
-#if CONFIG_SPATIAL_SVC
       {0},
-#endif
       {0},                    // ss_target_bitrate
       1,                      // ts_number_layers
       {0},                    // ts_target_bitrate
@@ -1343,8 +1349,6 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = {
   encoder_init,       // vpx_codec_init_fn_t
   encoder_destroy,    // vpx_codec_destroy_fn_t
   encoder_ctrl_maps,  // vpx_codec_ctrl_fn_map_t
-  NOT_IMPLEMENTED,    // vpx_codec_get_mmap_fn_t
-  NOT_IMPLEMENTED,    // vpx_codec_set_mmap_fn_t
   {  // NOLINT
     NOT_IMPLEMENTED,  // vpx_codec_peek_si_fn_t
     NOT_IMPLEMENTED,  // vpx_codec_get_si_fn_t
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 3ff3f6c76..bc7801152 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -98,6 +98,30 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
   return VPX_CODEC_OK;
 }
 
+static int parse_bitdepth_colorspace_sampling(
+    BITSTREAM_PROFILE profile, struct vp9_read_bit_buffer *rb) {
+  const int sRGB = 7;
+  int colorspace;
+  if (profile >= PROFILE_2)
+    rb->bit_offset += 1;  // Bit-depth 10 or 12.
+  colorspace = vp9_rb_read_literal(rb, 3);
+  if (colorspace != sRGB) {
+    rb->bit_offset += 1;  // [16,235] (including xvycc) vs [0,255] range.
+    if (profile == PROFILE_1 || profile == PROFILE_3) {
+      rb->bit_offset += 2;  // subsampling x/y.
+      rb->bit_offset += 1;  // unused.
+    }
+  } else {
+    if (profile == PROFILE_1 || profile == PROFILE_3) {
+      rb->bit_offset += 1;  // unused
+    } else {
+      // RGB is only available in version 1.
+      return 0;
+    }
+  }
+  return 1;
+}
+
 static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
                                                 unsigned int data_sz,
                                                 vpx_codec_stream_info_t *si,
@@ -144,37 +168,24 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
     error_resilient = vp9_rb_read_bit(&rb);
 
     if (si->is_kf) {
-      const int sRGB = 7;
-      int colorspace;
-
       if (!vp9_read_sync_code(&rb))
         return VPX_CODEC_UNSUP_BITSTREAM;
 
-      if (profile > PROFILE_1)
-        rb.bit_offset += 1;  // Bit-depth 10 or 12
-      colorspace = vp9_rb_read_literal(&rb, 3);
-      if (colorspace != sRGB) {
-        rb.bit_offset += 1;  // [16,235] (including xvycc) vs [0,255] range
-        if (profile == PROFILE_1 || profile == PROFILE_3) {
-          rb.bit_offset += 2;  // subsampling x/y
-          rb.bit_offset += 1;  // unused
-        }
-      } else {
-        if (profile == PROFILE_1 || profile == PROFILE_3) {
-          rb.bit_offset += 1;  // unused
-        } else {
-          // RGB is only available in version 1
-          return VPX_CODEC_UNSUP_BITSTREAM;
-        }
-      }
+      if (!parse_bitdepth_colorspace_sampling(profile, &rb))
+        return VPX_CODEC_UNSUP_BITSTREAM;
       vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
     } else {
       intra_only_flag = show_frame ? 0 : vp9_rb_read_bit(&rb);
+
       rb.bit_offset += error_resilient ? 0 : 2;  // reset_frame_context
 
       if (intra_only_flag) {
         if (!vp9_read_sync_code(&rb))
           return VPX_CODEC_UNSUP_BITSTREAM;
+        if (profile > PROFILE_0) {
+          if (!parse_bitdepth_colorspace_sampling(profile, &rb))
+            return VPX_CODEC_UNSUP_BITSTREAM;
+        }
         rb.bit_offset += REF_FRAMES;  // refresh_frame_flags
         vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
       }
@@ -572,9 +583,9 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
   vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
 
   if (data) {
-    YV12_BUFFER_CONFIG* fb;
+    YV12_BUFFER_CONFIG* fb = get_ref_frame(&ctx->pbi->common, data->idx);
+    if (fb == NULL) return VPX_CODEC_ERROR;
 
-    vp9_get_reference_dec(ctx->pbi, data->idx, &fb);
     yuvconfig2image(&data->img, fb, NULL);
     return VPX_CODEC_OK;
   } else {
@@ -704,8 +715,6 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
   decoder_init,       // vpx_codec_init_fn_t
   decoder_destroy,    // vpx_codec_destroy_fn_t
   decoder_ctrl_maps,  // vpx_codec_ctrl_fn_map_t
-  NOT_IMPLEMENTED,    // vpx_codec_get_mmap_fn_t
-  NOT_IMPLEMENTED,    // vpx_codec_set_mmap_fn_t
   { // NOLINT
     decoder_peek_si,    // vpx_codec_peek_si_fn_t
     decoder_get_si,     // vpx_codec_get_si_fn_t
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index cdda3406b..f84bfedcf 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -248,37 +248,6 @@ typedef vpx_codec_err_t (*vpx_codec_set_fb_fn_t)(
     vpx_get_frame_buffer_cb_fn_t cb_get,
     vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
 
-/*\brief eXternal Memory Allocation memory map get iterator
- *
- * Iterates over a list of the memory maps requested by the decoder. The
- * iterator storage should be initialized to NULL to start the iteration.
- * Iteration is complete when this function returns NULL.
- *
- * \param[in out] iter     Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an memory segment descriptor, or NULL to
- *         indicate end-of-list.
- */
-typedef vpx_codec_err_t (*vpx_codec_get_mmap_fn_t)(const vpx_codec_ctx_t      *ctx,
-                                                   vpx_codec_mmap_t           *mmap,
-                                                   vpx_codec_iter_t           *iter);
-
-
-/*\brief eXternal Memory Allocation memory map set iterator
- *
- * Sets a memory descriptor inside the decoder instance.
- *
- * \param[in] ctx      Pointer to this instance's context
- * \param[in] mmap     Memory map to store.
- *
- * \retval #VPX_CODEC_OK
- *     The memory map was accepted and stored.
- * \retval #VPX_CODEC_MEM_ERROR
- *     The memory map was rejected.
- */
-typedef vpx_codec_err_t (*vpx_codec_set_mmap_fn_t)(vpx_codec_ctx_t         *ctx,
-                                                   const vpx_codec_mmap_t  *mmap);
-
 
 typedef vpx_codec_err_t (*vpx_codec_encode_fn_t)(vpx_codec_alg_priv_t  *ctx,
                                                  const vpx_image_t     *img,
@@ -330,8 +299,6 @@ struct vpx_codec_iface {
   vpx_codec_init_fn_t       init;    /**< \copydoc ::vpx_codec_init_fn_t */
   vpx_codec_destroy_fn_t    destroy;     /**< \copydoc ::vpx_codec_destroy_fn_t */
   vpx_codec_ctrl_fn_map_t  *ctrl_maps;   /**< \copydoc ::vpx_codec_ctrl_fn_map_t */
-  vpx_codec_get_mmap_fn_t   get_mmap;    /**< \copydoc ::vpx_codec_get_mmap_fn_t */
-  vpx_codec_set_mmap_fn_t   set_mmap;    /**< \copydoc ::vpx_codec_set_mmap_fn_t */
   struct vpx_codec_dec_iface {
     vpx_codec_peek_si_fn_t    peek_si;     /**< \copydoc ::vpx_codec_peek_si_fn_t */
     vpx_codec_get_si_fn_t     get_si;      /**< \copydoc ::vpx_codec_get_si_fn_t */
@@ -487,31 +454,6 @@ static void vpx_internal_error(struct vpx_internal_error_info *info,
     longjmp(info->jmp, info->error_code);
 }
 
-//------------------------------------------------------------------------------
-// mmap interface
-
-typedef struct {
-  unsigned int   id;
-  unsigned long  sz;
-  unsigned int   align;
-  unsigned int   flags;
-  unsigned long (*calc_sz)(const vpx_codec_dec_cfg_t *, vpx_codec_flags_t);
-} mem_req_t;
-
-// Allocates mmap.priv and sets mmap.base based on mmap.sz/align/flags
-// requirements.
-// Returns #VPX_CODEC_OK on success, #VPX_CODEC_MEM_ERROR otherwise.
-vpx_codec_err_t vpx_mmap_alloc(vpx_codec_mmap_t *mmap);
-
-// Frees mmap.base allocated by a call to vpx_mmap_alloc().
-void vpx_mmap_dtor(vpx_codec_mmap_t *mmap);
-
-// Checks each mmap has the size requirement specificied by mem_reqs.
-// Returns #VPX_CODEC_OK on success, #VPX_CODEC_MEM_ERROR otherwise.
-vpx_codec_err_t vpx_validate_mmaps(const vpx_codec_stream_info_t *si,
-                                   const vpx_codec_mmap_t *mmaps,
-                                   const mem_req_t *mem_reqs, int nreqs,
-                                   vpx_codec_flags_t init_flags);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 93e86e319..7828615b2 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -510,8 +510,10 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
     }
   }
 
+#if CONFIG_SPATIAL_SVC
   for (i = 0; i < si->layers; ++i)
     enc_cfg->ss_enable_auto_alt_ref[i] = si->enable_auto_alt_ref[i];
+#endif
 
   // modify encoder configuration
   enc_cfg->ss_number_layers = si->layers;
@@ -709,12 +711,14 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
         si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz;
         break;
       }
+#if CONFIG_SPATIAL_SVC
       case VPX_CODEC_SPATIAL_SVC_LAYER_SIZES: {
         int i;
         for (i = 0; i < si->layers; ++i)
           si->bytes_sum[i] += cx_pkt->data.layer_sizes[i];
         break;
       }
+#endif
       default: {
         break;
       }
diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c
index 6fb8f522d..9f7af9f83 100644
--- a/vpx/src/vpx_codec.c
+++ b/vpx/src/vpx_codec.c
@@ -134,51 +134,3 @@ vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t  *ctx,
 
   return SAVE_STATUS(ctx, res);
 }
-
-//------------------------------------------------------------------------------
-// mmap interface
-
-vpx_codec_err_t vpx_mmap_alloc(vpx_codec_mmap_t *mmap) {
-  unsigned int align = mmap->align ? mmap->align - 1 : 0;
-
-  if (mmap->flags & VPX_CODEC_MEM_ZERO)
-    mmap->priv = calloc(1, mmap->sz + align);
-  else
-    mmap->priv = malloc(mmap->sz + align);
-
-  if (mmap->priv == NULL) return VPX_CODEC_MEM_ERROR;
-  mmap->base = (void *)((((uintptr_t)mmap->priv) + align) & ~(uintptr_t)align);
-  mmap->dtor = vpx_mmap_dtor;
-  return VPX_CODEC_OK;
-}
-
-void vpx_mmap_dtor(vpx_codec_mmap_t *mmap) {
-  free(mmap->priv);
-}
-
-vpx_codec_err_t vpx_validate_mmaps(const vpx_codec_stream_info_t *si,
-                                   const vpx_codec_mmap_t *mmaps,
-                                   const mem_req_t *mem_reqs, int nreqs,
-                                   vpx_codec_flags_t init_flags) {
-  int i;
-
-  for (i = 0; i < nreqs - 1; ++i) {
-    /* Ensure the segment has been allocated */
-    if (mmaps[i].base == NULL) {
-      return VPX_CODEC_MEM_ERROR;
-    }
-
-    /* Verify variable size segment is big enough for the current si. */
-    if (mem_reqs[i].calc_sz != NULL) {
-      vpx_codec_dec_cfg_t cfg;
-
-      cfg.w = si->w;
-      cfg.h = si->h;
-
-      if (mmaps[i].sz < mem_reqs[i].calc_sz(&cfg, init_flags)) {
-        return VPX_CODEC_MEM_ERROR;
-      }
-    }
-  }
-  return VPX_CODEC_OK;
-}
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index 63fdaf308..d9c8a9522 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -31,8 +31,6 @@ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t      *ctx,
     res = VPX_CODEC_INVALID_PARAM;
   else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
     res = VPX_CODEC_ABI_MISMATCH;
-  else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
-    res = VPX_CODEC_INCAPABLE;
   else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
     res = VPX_CODEC_INCAPABLE;
   else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) &&
@@ -50,19 +48,15 @@ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t      *ctx,
     ctx->priv = NULL;
     ctx->init_flags = flags;
     ctx->config.dec = cfg;
-    res = VPX_CODEC_OK;
-
-    if (!(flags & VPX_CODEC_USE_XMA)) {
-      res = ctx->iface->init(ctx, NULL);
-
-      if (res) {
-        ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
-        vpx_codec_destroy(ctx);
-      }
 
-      if (ctx->priv)
-        ctx->priv->iface = ctx->iface;
+    res = ctx->iface->init(ctx, NULL);
+    if (res) {
+      ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
+      vpx_codec_destroy(ctx);
     }
+
+    if (ctx->priv)
+      ctx->priv->iface = ctx->iface;
   }
 
   return SAVE_STATUS(ctx, res);
@@ -183,50 +177,6 @@ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t             *ctx
   return SAVE_STATUS(ctx, res);
 }
 
-
-vpx_codec_err_t vpx_codec_get_mem_map(vpx_codec_ctx_t                *ctx,
-                                      vpx_codec_mmap_t               *mmap,
-                                      vpx_codec_iter_t               *iter) {
-  vpx_codec_err_t res = VPX_CODEC_OK;
-
-  if (!ctx || !mmap || !iter || !ctx->iface)
-    res = VPX_CODEC_INVALID_PARAM;
-  else if (!(ctx->iface->caps & VPX_CODEC_CAP_XMA))
-    res = VPX_CODEC_ERROR;
-  else
-    res = ctx->iface->get_mmap(ctx, mmap, iter);
-
-  return SAVE_STATUS(ctx, res);
-}
-
-
-vpx_codec_err_t vpx_codec_set_mem_map(vpx_codec_ctx_t   *ctx,
-                                      vpx_codec_mmap_t  *mmap,
-                                      unsigned int     num_maps) {
-  vpx_codec_err_t res = VPX_CODEC_MEM_ERROR;
-
-  if (!ctx || !mmap || !ctx->iface)
-    res = VPX_CODEC_INVALID_PARAM;
-  else if (!(ctx->iface->caps & VPX_CODEC_CAP_XMA))
-    res = VPX_CODEC_ERROR;
-  else {
-    unsigned int i;
-
-    for (i = 0; i < num_maps; i++, mmap++) {
-      if (!mmap->base)
-        break;
-
-      /* Everything look ok, set the mmap in the decoder */
-      res = ctx->iface->set_mmap(ctx, mmap);
-
-      if (res)
-        break;
-    }
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
 vpx_codec_err_t vpx_codec_set_frame_buffer_functions(
     vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get,
     vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index db019957e..6e18bd129 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -35,8 +35,6 @@ vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t      *ctx,
     res = VPX_CODEC_ABI_MISMATCH;
   else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
     res = VPX_CODEC_INCAPABLE;
-  else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
-    res = VPX_CODEC_INCAPABLE;
   else if ((flags & VPX_CODEC_USE_PSNR)
            && !(iface->caps & VPX_CODEC_CAP_PSNR))
     res = VPX_CODEC_INCAPABLE;
@@ -80,8 +78,6 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t      *ctx,
     res = VPX_CODEC_ABI_MISMATCH;
   else if (!(iface->caps & VPX_CODEC_CAP_ENCODER))
     res = VPX_CODEC_INCAPABLE;
-  else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
-    res = VPX_CODEC_INCAPABLE;
   else if ((flags & VPX_CODEC_USE_PSNR)
            && !(iface->caps & VPX_CODEC_CAP_PSNR))
     res = VPX_CODEC_INCAPABLE;
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 45e702354..07df72a78 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -153,7 +153,6 @@ extern "C" {
   typedef long vpx_codec_caps_t;
 #define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */
 #define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */
-#define VPX_CODEC_CAP_XMA     0x4 /**< Supports eXternal Memory Allocation */
 
 
   /*! \brief Initialization-time Feature Enabling
@@ -164,7 +163,6 @@ extern "C" {
    *  The available flags are specified by VPX_CODEC_USE_* defines.
    */
   typedef long vpx_codec_flags_t;
-#define VPX_CODEC_USE_XMA 0x00000001    /**< Use eXternal Memory Allocation mode */
 
 
   /*!\brief Codec interface structure.
@@ -471,94 +469,6 @@ extern "C" {
 
 #endif
 
-
-  /*!\defgroup cap_xma External Memory Allocation Functions
-   *
-   * The following functions are required to be implemented for all codecs
-   * that advertise the VPX_CODEC_CAP_XMA capability. Calling these functions
-   * for codecs that don't advertise this capability will result in an error
-   * code being returned, usually VPX_CODEC_INCAPABLE
-   * @{
-   */
-
-
-  /*!\brief Memory Map Entry
-   *
-   * This structure is used to contain the properties of a memory segment. It
-   * is populated by the codec in the request phase, and by the calling
-   * application once the requested allocation has been performed.
-   */
-  typedef struct vpx_codec_mmap {
-    /*
-     * The following members are set by the codec when requesting a segment
-     */
-    unsigned int   id;     /**< identifier for the segment's contents */
-    unsigned long  sz;     /**< size of the segment, in bytes */
-    unsigned int   align;  /**< required alignment of the segment, in bytes */
-    unsigned int   flags;  /**< bitfield containing segment properties */
-#define VPX_CODEC_MEM_ZERO     0x1  /**< Segment must be zeroed by allocation */
-#define VPX_CODEC_MEM_WRONLY   0x2  /**< Segment need not be readable */
-#define VPX_CODEC_MEM_FAST     0x4  /**< Place in fast memory, if available */
-
-    /* The following members are to be filled in by the allocation function */
-    void          *base;   /**< pointer to the allocated segment */
-    void (*dtor)(struct vpx_codec_mmap *map);         /**< destructor to call */
-    void          *priv;   /**< allocator private storage */
-  } vpx_codec_mmap_t; /**< alias for struct vpx_codec_mmap */
-
-
-  /*!\brief Iterate over the list of segments to allocate.
-   *
-   * Iterates over a list of the segments to allocate. The iterator storage
-   * should be initialized to NULL to start the iteration. Iteration is complete
-   * when this function returns VPX_CODEC_LIST_END. The amount of memory needed to
-   * allocate is dependent upon the size of the encoded stream. In cases where the
-   * stream is not available at allocation time, a fixed size must be requested.
-   * The codec will not be able to operate on streams larger than the size used at
-   * allocation time.
-   *
-   * \param[in]      ctx     Pointer to this instance's context.
-   * \param[out]     mmap    Pointer to the memory map entry to populate.
-   * \param[in,out]  iter    Iterator storage, initialized to NULL
-   *
-   * \retval #VPX_CODEC_OK
-   *     The memory map entry was populated.
-   * \retval #VPX_CODEC_ERROR
-   *     Codec does not support XMA mode.
-   * \retval #VPX_CODEC_MEM_ERROR
-   *     Unable to determine segment size from stream info.
-   */
-  vpx_codec_err_t vpx_codec_get_mem_map(vpx_codec_ctx_t                *ctx,
-                                        vpx_codec_mmap_t               *mmap,
-                                        vpx_codec_iter_t               *iter);
-
-
-  /*!\brief Identify allocated segments to codec instance
-   *
-   * Stores a list of allocated segments in the codec. Segments \ref MUST be
-   * passed in the order they are read from vpx_codec_get_mem_map(), but may be
-   * passed in groups of any size. Segments \ref MUST be set only once. The
-   * allocation function \ref MUST ensure that the vpx_codec_mmap_t::base member
-   * is non-NULL. If the segment requires cleanup handling (e.g., calling free()
-   * or close()) then the vpx_codec_mmap_t::dtor member \ref MUST be populated.
-   *
-   * \param[in]      ctx     Pointer to this instance's context.
-   * \param[in]      mmaps   Pointer to the first memory map entry in the list.
-   * \param[in]      num_maps  Number of entries being set at this time
-   *
-   * \retval #VPX_CODEC_OK
-   *     The segment was stored in the codec context.
-   * \retval #VPX_CODEC_INCAPABLE
-   *     Codec does not support XMA mode.
-   * \retval #VPX_CODEC_MEM_ERROR
-   *     Segment base address was not set, or segment was already stored.
-
-   */
-  vpx_codec_err_t  vpx_codec_set_mem_map(vpx_codec_ctx_t   *ctx,
-                                         vpx_codec_mmap_t  *mmaps,
-                                         unsigned int       num_maps);
-
-  /*!@} - end defgroup cap_xma*/
   /*!@} - end defgroup codec*/
 #ifdef __cplusplus
 }
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index ba183283e..10b89fa0f 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -122,10 +122,6 @@ extern "C" {
    * is not thread safe and should be guarded with a lock if being used
    * in a multithreaded context.
    *
-   * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
-   * parameter), the storage pointed to by the cfg parameter must be
-   * kept readable and stable until all memory maps have been set.
-   *
    * \param[in]    ctx     Pointer to this instance's context.
    * \param[in]    iface   Pointer to the algorithm interface to use.
    * \param[in]    cfg     Configuration to use, if known. May be NULL.
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 9d2f18c88..7dbbf2f61 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -637,14 +637,12 @@ extern "C" {
      */
     unsigned int           ss_number_layers;
 
-#if CONFIG_SPATIAL_SVC
     /*!\brief Enable auto alt reference flags for each spatial layer.
      *
      * These values specify if auto alt reference frame is enabled for each
      * spatial layer.
      */
     int                    ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
-#endif
 
     /*!\brief Target bitrate for each spatial layer.
      *
@@ -704,10 +702,6 @@ extern "C" {
    * is not thread safe and should be guarded with a lock if being used
    * in a multithreaded context.
    *
-   * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
-   * parameter), the storage pointed to by the cfg parameter must be
-   * kept readable and stable until all memory maps have been set.
-   *
    * \param[in]    ctx     Pointer to this instance's context.
    * \param[in]    iface   Pointer to the algorithm interface to use.
    * \param[in]    cfg     Configuration to use, if known. May be NULL.
@@ -741,10 +735,6 @@ extern "C" {
    * instead of this function directly, to ensure that the ABI version number
    * parameter is properly initialized.
    *
-   * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
-   * parameter), the storage pointed to by the cfg parameter must be
-   * kept readable and stable until all memory maps have been set.
-   *
    * \param[in]    ctx     Pointer to this instance's context.
    * \param[in]    iface   Pointer to the algorithm interface to use.
    * \param[in]    cfg     Configuration to use, if known. May be NULL.
diff --git a/vpxenc.c b/vpxenc.c
index d4bb034a4..7e037a62c 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -378,11 +378,20 @@ static const arg_def_t frame_periodic_boost = ARG_DEF(
     NULL, "frame_boost", 1,
     "Enable frame periodic boost (0: off (default), 1: on)");
 
+static const struct arg_enum_list tune_content_enum[] = {
+  {"default", VP9E_CONTENT_DEFAULT},
+  {"screen", VP9E_CONTENT_SCREEN},
+  {NULL, 0}
+};
+
+static const arg_def_t tune_content = ARG_DEF_ENUM(
+    NULL, "tune-content", 1, "Tune content type", tune_content_enum);
+
 static const arg_def_t *vp9_args[] = {
   &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
   &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
   &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless,
-  &frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
+  &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, &tune_content,
   NULL
 };
 static const int vp9_arg_ctrl_map[] = {
@@ -392,7 +401,7 @@ static const int vp9_arg_ctrl_map[] = {
   VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
   VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
   VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
-  VP9E_SET_FRAME_PERIODIC_BOOST,
+  VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_TUNE_CONTENT,
   0
 };
 #endif