18 files changed, 474 insertions, 205 deletions
diff --git a/test/i420_video_source.h b/test/i420_video_source.h
index c3315f9ce..0a184805c 100644
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -13,104 +13,22 @@
 #include <cstdlib>
 #include <string>
 
-#include "test/video_source.h"
+#include "test/yuv_video_source.h"
 
 namespace libvpx_test {
 
 // This class extends VideoSource to allow parsing of raw yv12
 // so that we can do actual file encodes.
-class I420VideoSource : public VideoSource {
+class I420VideoSource : public YUVVideoSource {
  public:
   I420VideoSource(const std::string &file_name,
                   unsigned int width, unsigned int height,
                   int rate_numerator, int rate_denominator,
                   unsigned int start, int limit)
-      : file_name_(file_name),
-        input_file_(NULL),
-        img_(NULL),
-        start_(start),
-        limit_(limit),
-        frame_(0),
-        width_(0),
-        height_(0),
-        framerate_numerator_(rate_numerator),
-        framerate_denominator_(rate_denominator) {
-    // This initializes raw_sz_, width_, height_ and allocates an img.
-    SetSize(width, height);
-  }
-
-  virtual ~I420VideoSource() {
-    vpx_img_free(img_);
-    if (input_file_)
-      fclose(input_file_);
-  }
-
-  virtual void Begin() {
-    if (input_file_)
-      fclose(input_file_);
-    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-        << file_name_;
-    if (start_) {
-      fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
-    }
-
-    frame_ = start_;
-    FillFrame();
-  }
-
-  virtual void Next() {
-    ++frame_;
-    FillFrame();
-  }
-
-  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
-
-  // Models a stream where Timebase = 1/FPS, so pts == frame.
-  virtual vpx_codec_pts_t pts() const { return frame_; }
-
-  virtual unsigned long duration() const { return 1; }
-
-  virtual vpx_rational_t timebase() const {
-    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
-    return t;
-  }
-
-  virtual unsigned int frame() const { return frame_; }
-
-  virtual unsigned int limit() const { return limit_; }
-
-  void SetSize(unsigned int width, unsigned int height) {
-    if (width != width_ || height != height_) {
-      vpx_img_free(img_);
-      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1);
-      ASSERT_TRUE(img_ != NULL);
-      width_ = width;
-      height_ = height;
-      raw_sz_ = width * height * 3 / 2;
-    }
-  }
-
-  virtual void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
-    // Read a frame from input_file.
-    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
-      limit_ = frame_;
-    }
-  }
-
- protected:
-  std::string file_name_;
-  FILE *input_file_;
-  vpx_image_t *img_;
-  size_t raw_sz_;
-  unsigned int start_;
-  unsigned int limit_;
-  unsigned int frame_;
-  unsigned int width_;
-  unsigned int height_;
-  int framerate_numerator_;
-  int framerate_denominator_;
+      : YUVVideoSource(file_name, VPX_IMG_FMT_I420,
+                       width, height,
+                       rate_numerator, rate_denominator,
+                       start, limit) {}
 };
 
 }  // namespace libvpx_test
diff --git a/test/test-data.mk b/test/test-data.mk
index c50b1c746..e4dae3a95 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -7,12 +7,15 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv
 
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 2dc64d334..69d1d2f25 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -17,12 +17,15 @@ d637297561dd904eb2c97a9015deeb31c4a1e8d2  invalid-vp90-2-08-tile_1x4_frame_paral
 a432f96ff0a787268e2f94a8092ab161a18d1b06  park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5  park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17  park_joy_90p_10_444.y4m
+c934da6fb8cc54ee2a8c17c54cf6076dac37ead0  park_joy_90p_10_440.yuv
 614c32ae1eca391e867c70d19974f0d62664dd99  park_joy_90p_12_420.y4m
 c92825f1ea25c5c37855083a69faac6ac4641a9e  park_joy_90p_12_422.y4m
 b592189b885b6cc85db55cc98512a197d73d3b34  park_joy_90p_12_444.y4m
+82c1bfcca368c2f22bad7d693d690d5499ecdd11  park_joy_90p_12_440.yuv
 4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c  park_joy_90p_8_420.y4m
 7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947  park_joy_90p_8_422.y4m
 bdb7856e6bc93599bdda05c2e773a9f22b6c6d03  park_joy_90p_8_444.y4m
+81e1f3843748438b8f2e71db484eb22daf72e939  park_joy_90p_8_440.yuv
 b1f1c3ec79114b9a0651af24ce634afb44a9a419  rush_hour_444.y4m
 5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
 65bf1bbbced81b97bd030f376d1b7f61a224793f  vp80-00-comprehensive-002.ivf
diff --git a/test/test.mk b/test/test.mk
index 1bd732d51..30c13a1d5 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -23,6 +23,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h
 
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
@@ -38,6 +39,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
 
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
diff --git a/test/vp9_end_to_end_test.cc b/test/vp9_end_to_end_test.cc
new file mode 100644
index 000000000..a8f679342
--- /dev/null
+++ b/test/vp9_end_to_end_test.cc
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const unsigned int kWidth  = 160;
+const unsigned int kHeight = 90;
+const unsigned int kFramerate = 50;
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+const int kCpuUsed = 2;
+const double psnr_threshold = 35.0;
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  vpx_img_fmt fmt;
+  vpx_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+const TestVideoParam TestVectors[] = {
+  {"park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, VPX_BITS_8, 1},
+  {"park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1},
+  {"park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1},
+#if CONFIG_VP9_HIGHBITDEPTH
+  {"park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2},
+  {"park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3},
+  {"park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3},
+  {"park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3},
+  {"park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2},
+  {"park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3},
+  {"park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3},
+  {"park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3},
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class EndToEndTestLarge
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, \
+                                                 TestVideoParam> {
+ protected:
+  EndToEndTestLarge()
+      : EncoderTest(GET_PARAM(0)),
+        psnr_(0.0),
+        nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {
+  }
+
+  virtual ~EndToEndTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 5;
+      cfg_.rc_end_usage = VPX_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+    }
+    test_video_param_ = GET_PARAM(2);
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, kCpuUsed);
+      if (encoding_mode_ != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_)
+      return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  TestVideoParam test_video_param_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libvpx_test::TestMode encoding_mode_;
+};
+
+TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::VideoSource *video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video = new libvpx_test::Y4mVideoSource(test_video_param_.filename,
+                                            0, kFrames);
+  } else {
+    video = new libvpx_test::YUVVideoSource(test_video_param_.filename,
+                                            test_video_param_.fmt,
+                                            kWidth, kHeight,
+                                            kFramerate, 1, 0, kFrames);
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  const double psnr = GetAveragePsnr();
+  EXPECT_GT(psnr, psnr_threshold);
+  delete(video);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    EndToEndTestLarge,
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::ValuesIn(TestVectors));
+
+}  // namespace
diff --git a/test/yuv_video_source.h b/test/yuv_video_source.h
new file mode 100644
index 000000000..3c852b242
--- /dev/null
+++ b/test/yuv_video_source.h
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_YUV_VIDEO_SOURCE_H_
+#define TEST_YUV_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/video_source.h"
+#include "vpx/vpx_image.h"
+
+namespace libvpx_test {
+
+// This class extends VideoSource to allow parsing of raw YUV
+// formats of various color sampling and bit-depths so that we can
+// do actual file encodes.
+class YUVVideoSource : public VideoSource {
+ public:
+  YUVVideoSource(const std::string &file_name, vpx_img_fmt format,
+                 unsigned int width, unsigned int height,
+                 int rate_numerator, int rate_denominator,
+                 unsigned int start, int limit)
+      : file_name_(file_name),
+        input_file_(NULL),
+        img_(NULL),
+        start_(start),
+        limit_(limit),
+        frame_(0),
+        width_(0),
+        height_(0),
+        format_(VPX_IMG_FMT_NONE),
+        framerate_numerator_(rate_numerator),
+        framerate_denominator_(rate_denominator) {
+    // This initializes format_, raw_size_, width_, height_ and allocates img.
+    SetSize(width, height, format);
+  }
+
+  virtual ~YUVVideoSource() {
+    vpx_img_free(img_);
+    if (input_file_)
+      fclose(input_file_);
+  }
+
+  virtual void Begin() {
+    if (input_file_)
+      fclose(input_file_);
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;
+    if (start_)
+      fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
+
+    frame_ = start_;
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual vpx_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual vpx_rational_t timebase() const {
+    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  virtual void SetSize(unsigned int width, unsigned int height,
+                       vpx_img_fmt format) {
+    if (width != width_ || height != height_ || format != format_) {
+      vpx_img_free(img_);
+      img_ = vpx_img_alloc(NULL, format, width, height, 1);
+      ASSERT_TRUE(img_ != NULL);
+      width_ = width;
+      height_ = height;
+      format_ = format;
+      switch (format) {
+        case VPX_IMG_FMT_I420:
+          raw_size_ = width * height * 3 / 2;
+          break;
+        case VPX_IMG_FMT_I422:
+          raw_size_ = width * height * 2;
+          break;
+        case VPX_IMG_FMT_I440:
+          raw_size_ = width * height * 2;
+          break;
+        case VPX_IMG_FMT_I444:
+          raw_size_ = width * height * 3;
+          break;
+        case VPX_IMG_FMT_I42016:
+          raw_size_ = width * height * 3;
+          break;
+        case VPX_IMG_FMT_I42216:
+          raw_size_ = width * height * 4;
+          break;
+        case VPX_IMG_FMT_I44016:
+          raw_size_ = width * height * 4;
+          break;
+        case VPX_IMG_FMT_I44416:
+          raw_size_ = width * height * 6;
+          break;
+        default:
+          ASSERT_TRUE(0);
+      }
+    }
+  }
+
+  virtual void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    // Read a frame from input_file.
+    if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
+      limit_ = frame_;
+    }
+  }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  vpx_image_t *img_;
+  size_t raw_size_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  vpx_img_fmt format_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+};
+
+}  // namespace libvpx_test
+
+#endif  // TEST_YUV_VIDEO_SOURCE_H_
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 12f9734a1..75b2a3be4 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -390,9 +390,9 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
     denoiser->denoise_pars.scale_motion_thresh = 16;
     denoiser->denoise_pars.scale_increase_filter = 1;
     denoiser->denoise_pars.denoise_mv_bias = 60;
-    denoiser->denoise_pars.pickmode_mv_bias = 60;
-    denoiser->denoise_pars.qp_thresh = 100;
-    denoiser->denoise_pars.consec_zerolast = 10;
+    denoiser->denoise_pars.pickmode_mv_bias = 75;
+    denoiser->denoise_pars.qp_thresh = 85;
+    denoiser->denoise_pars.consec_zerolast = 15;
     denoiser->denoise_pars.spatial_blur = 20;
   }
 }
@@ -453,17 +453,17 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
     // Bitrate thresholds and noise metric (nmse) thresholds for switching to
     // aggressive mode.
     // TODO(marpan): Adjust thresholds, including effect on resolution.
-    denoiser->bitrate_threshold = 200000;  // (bits/sec).
+    denoiser->bitrate_threshold = 300000;  // (bits/sec).
     denoiser->threshold_aggressive_mode = 35;
-    if (width * height > 640 * 480) {
-      denoiser->bitrate_threshold = 500000;
-      denoiser->threshold_aggressive_mode = 100;
+    if (width * height > 1280 * 720) {
+      denoiser->bitrate_threshold = 2000000;
+      denoiser->threshold_aggressive_mode = 1400;
     } else if (width * height > 960 * 540) {
       denoiser->bitrate_threshold = 800000;
       denoiser->threshold_aggressive_mode = 150;
-    } else if (width * height > 1280 * 720) {
-      denoiser->bitrate_threshold = 2000000;
-      denoiser->threshold_aggressive_mode = 1400;
+    } else if (width * height > 640 * 480) {
+      denoiser->bitrate_threshold = 500000;
+      denoiser->threshold_aggressive_mode = 100;
     }
     return 0;
 }
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index de389e7af..d15ddec73 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1114,6 +1114,11 @@ specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
 add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
 specialize qw/vp9_avg_8x8 sse2/;
 
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
+  specialize qw/vp9_highbd_avg_8x8/;
+}
+
 # ENCODEMB INVOKE
 
 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 3c9469c4c..baf6ab7ef 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -232,6 +232,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
       cm->frame_refs[0].buf->corrupted = 1;
   }
 
+  pbi->ready_for_new_data = 0;
+
   // Check if the previous frame was a frame without any references to it.
   if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
     cm->release_fb_cb(cm->cb_priv,
@@ -279,8 +281,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
     cm->current_video_frame++;
   }
 
-  pbi->ready_for_new_data = 0;
-
   cm->error.setjmp = 0;
   return retcode;
 }
@@ -296,12 +296,12 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
   if (pbi->ready_for_new_data == 1)
     return ret;
 
+  pbi->ready_for_new_data = 1;
+
   /* no raw frame to show!!! */
   if (!cm->show_frame)
     return ret;
 
-  pbi->ready_for_new_data = 1;
-
 #if CONFIG_VP9_POSTPROC
   if (!cm->show_existing_frame) {
     ret = vp9_post_proc_frame(cm, sd, flags);
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c
index 22c6cc4fc..e9810c894 100644
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -7,6 +7,7 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+#include "vp9/common/vp9_common.h"
 #include "vpx_ports/mem.h"
 
 unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
@@ -17,3 +18,16 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
 
   return (sum + 32) >> 6;
 }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
+  int i, j;
+  int sum = 0;
+  const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
+  for (i = 0; i < 8; ++i, s+=p)
+    for (j = 0; j < 8; sum += s[j], ++j) {}
+
+  return (sum + 32) >> 6;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 420ec0b51..6eff8c501 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -533,8 +533,19 @@ static void choose_partitioning(VP9_COMP *cpi,
         int sum = 0;
 
         if (x_idx < pixels_wide && y_idx < pixels_high) {
-          int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
-          int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+          int s_avg, d_avg;
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+            s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp);
+            d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp);
+          } else {
+            s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+            d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+          }
+#else
+          s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+          d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+#endif
           sum = s_avg - d_avg;
           sse = sum * sum;
         }
@@ -1522,9 +1533,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
   BLOCK_SIZE subsize;
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   PARTITION_CONTEXT sl[8], sa[8];
-  RD_COST last_part_rdc = {INT_MAX, INT64_MAX, INT64_MAX};
-  RD_COST none_rdc = {INT_MAX, INT64_MAX, INT64_MAX};
-  RD_COST chosen_rdc = {INT_MAX, INT64_MAX, INT64_MAX};
+  RD_COST last_part_rdc, none_rdc, chosen_rdc;
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
   BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type;
@@ -1537,6 +1546,10 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
   assert(num_4x4_blocks_wide_lookup[bsize] ==
          num_4x4_blocks_high_lookup[bsize]);
 
+  vp9_rd_cost_reset(&last_part_rdc);
+  vp9_rd_cost_reset(&none_rdc);
+  vp9_rd_cost_reset(&chosen_rdc);
+
   partition = partition_lookup[bsl][bs_type];
   subsize = get_subsize(bsize, partition);
 
@@ -1598,16 +1611,15 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
                        INT64_MAX);
       if (last_part_rdc.rate != INT_MAX &&
           bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
-        RD_COST tmp_rdc = {0, 0, 0};
+        RD_COST tmp_rdc;
         PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+        vp9_rd_cost_init(&tmp_rdc);
         update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
         rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
                          subsize, &pc_tree->horizontal[1], INT64_MAX);
         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-          last_part_rdc.rate = INT_MAX;
-          last_part_rdc.dist = INT64_MAX;
-          last_part_rdc.rdcost = INT64_MAX;
+          vp9_rd_cost_reset(&last_part_rdc);
           break;
         }
         last_part_rdc.rate += tmp_rdc.rate;
@@ -1620,17 +1632,16 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
                        subsize, &pc_tree->vertical[0], INT64_MAX);
       if (last_part_rdc.rate != INT_MAX &&
           bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
-        RD_COST tmp_rdc = {0, 0, 0};
+        RD_COST tmp_rdc;
         PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+        vp9_rd_cost_init(&tmp_rdc);
         update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
         rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
                          subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
                          INT64_MAX);
         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-          last_part_rdc.rate = INT_MAX;
-          last_part_rdc.dist = INT64_MAX;
-          last_part_rdc.rdcost = INT64_MAX;
+          vp9_rd_cost_reset(&last_part_rdc);
           break;
         }
         last_part_rdc.rate += tmp_rdc.rate;
@@ -1651,19 +1662,17 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
         int x_idx = (i & 1) * (mi_step >> 1);
         int y_idx = (i >> 1) * (mi_step >> 1);
         int jj = i >> 1, ii = i & 0x01;
-        RD_COST tmp_rdc = {0, 0, 0};
-
+        RD_COST tmp_rdc;
         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
           continue;
 
+        vp9_rd_cost_init(&tmp_rdc);
         rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
                          mi_row + y_idx, mi_col + x_idx, subsize,
                          &tmp_rdc.rate, &tmp_rdc.dist,
                          i != 3, pc_tree->split[i]);
         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-          last_part_rdc.rate = INT_MAX;
-          last_part_rdc.dist = INT64_MAX;
-          last_part_rdc.rdcost = INT64_MAX;
+          vp9_rd_cost_reset(&last_part_rdc);
           break;
         }
         last_part_rdc.rate += tmp_rdc.rate;
@@ -1710,15 +1719,12 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
       save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
       pc_tree->split[i]->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
-                       split_subsize, &pc_tree->split[i]->none,
-                       INT64_MAX);
+                       split_subsize, &pc_tree->split[i]->none, INT64_MAX);
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-        chosen_rdc.rate = INT_MAX;
-        chosen_rdc.dist = INT64_MAX;
-        chosen_rdc.rdcost = INT64_MAX;
+        vp9_rd_cost_reset(&chosen_rdc);
         break;
       }
 
@@ -2123,9 +2129,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   int i, pl;
   BLOCK_SIZE subsize;
-  RD_COST this_rdc = {0, 0, 0};
-  RD_COST sum_rdc = {0, 0, 0};
-  RD_COST best_rdc = {INT_MAX, INT64_MAX, best_rd};
+  RD_COST this_rdc, sum_rdc, best_rdc;
   int do_split = bsize >= BLOCK_8X8;
   int do_rect = 1;
 
@@ -2153,6 +2157,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   assert(num_8x8_blocks_wide_lookup[bsize] ==
              num_8x8_blocks_high_lookup[bsize]);
 
+  vp9_rd_cost_init(&this_rdc);
+  vp9_rd_cost_init(&sum_rdc);
+  vp9_rd_cost_reset(&best_rdc);
+  best_rdc.rdcost = best_rd;
+
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
 
   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode)
@@ -2574,16 +2583,15 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                              sf->always_this_block_size);
       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);
-    } else if (cpi->partition_search_skippable_frame ||
-               sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
+    } else if (cpi->partition_search_skippable_frame) {
       BLOCK_SIZE bsize;
       set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
       bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
       set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);
-      } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
-                 cm->frame_type != KEY_FRAME ) {
+    } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
+               cm->frame_type != KEY_FRAME ) {
       choose_partitioning(cpi, tile, mi_row, mi_col);
       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);
@@ -2835,8 +2843,13 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       this_rate += cpi->partition_cost[pl][PARTITION_NONE];
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
       if (sum_rd < best_rd) {
-        int64_t stop_thresh = 4096;
-        int64_t stop_thresh_rd;
+        int dist_breakout_thr = sf->partition_search_breakout_dist_thr;
+        int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
+
+        dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
+            b_height_log2_lookup[bsize]);
+
+        rate_breakout_thr *= num_pels_log2_lookup[bsize];
 
         best_rate = this_rate;
         best_dist = this_dist;
@@ -2844,14 +2857,9 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         if (bsize >= BLOCK_8X8)
           pc_tree->partitioning = PARTITION_NONE;
 
-        // Adjust threshold according to partition size.
-        stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
-            b_height_log2_lookup[bsize]);
-
-        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
-        // If obtained distortion is very small, choose current partition
-        // and stop splitting.
-        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+        if (!x->e_mbd.lossless &&
+            this_rate < rate_breakout_thr &&
+            this_dist < dist_breakout_thr) {
           do_split = 0;
           do_rect = 0;
         }
@@ -3176,7 +3184,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
         nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist, cpi->pc_root);
         break;
-      case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
         bsize = sf->partition_search_type == FIXED_PARTITION ?
                 sf->always_this_block_size :
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 96c3e0aa4..f1baf8323 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -66,13 +66,6 @@ static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
   *b = temp;
 }
 
-static int gfboost_qadjust(int qindex, vpx_bit_depth_t bit_depth) {
-  const double q = vp9_convert_qindex_to_q(qindex, bit_depth);
-  return (int)((0.00000828 * q * q * q) +
-               (-0.0055 * q * q) +
-               (1.32 * q) + 79.3);
-}
-
 // Resets the first pass file to the given position using a relative seek from
 // the current position.
 static void reset_fpf_position(TWO_PASS *p,
@@ -1317,14 +1310,15 @@ static double calc_frame_boost(VP9_COMP *cpi,
                                double this_frame_mv_in_out,
                                double max_boost) {
   double frame_boost;
-  const double lq = vp9_convert_qindex_to_q(cpi->rc.last_q[INTER_FRAME],
-                                            cpi->common.bit_depth);
-  const double q_correction = MIN((0.8 + (lq * 0.001)), 1.0);
+  const double lq =
+    vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
+                            cpi->common.bit_depth);
+  const double boost_correction = MIN((0.5 + (lq * 0.015)), 1.5);
 
   // Underlying boost factor is based on inter error ratio.
   frame_boost = (BASELINE_ERR_PER_MB * cpi->common.MBs) /
                 DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
-  frame_boost = frame_boost * BOOST_FACTOR * q_correction;
+  frame_boost = frame_boost * BOOST_FACTOR * boost_correction;
 
   // Increase boost for frames where new data coming into frame (e.g. zoom out).
   // Slightly reduce boost if there is a net balance of motion out of the frame
@@ -1335,7 +1329,7 @@ static double calc_frame_boost(VP9_COMP *cpi,
   else
     frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
 
-  return MIN(frame_boost, max_boost * q_correction);
+  return MIN(frame_boost, max_boost * boost_correction);
 }
 
 static int calc_arf_boost(VP9_COMP *cpi, int offset,
@@ -1874,19 +1868,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
 
   // Calculate the extra bits to be used for boosted frame(s)
-  {
-    int q = rc->last_q[INTER_FRAME];
-    int boost =
-        (rc->gfu_boost * gfboost_qadjust(q, cpi->common.bit_depth)) / 100;
-
-    // Set max and minimum boost and hence minimum allocation.
-    boost = clamp(boost, MIN_ARF_GF_BOOST,
-                  (rc->baseline_gf_interval + 1) * 200);
-
-    // Calculate the extra bits to be used for boosted frame(s)
-    gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
-                                       boost, gf_group_bits);
-  }
+  gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
+                                     rc->gfu_boost, gf_group_bits);
 
   // Adjust KF group bits and error remaining.
   twopass->kf_group_error_left -= (int64_t)gf_group_err;
@@ -2380,7 +2363,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
                                                 section_target_bandwidth);
     twopass->active_worst_quality = tmp_q;
     rc->ni_av_qi = tmp_q;
+    rc->last_q[INTER_FRAME] = tmp_q;
     rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth);
+    rc->avg_frame_qindex[INTER_FRAME] = tmp_q;
+    rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2;
+    rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
   }
   vp9_zero(this_frame);
   if (EOF == input_stats(twopass, &this_frame))
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 75c396433..7f526fc42 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -44,6 +44,18 @@
 // Factor to weigh the rate for switchable interp filters.
 #define SWITCHABLE_INTERP_RATE_FACTOR 1
 
+void vp9_rd_cost_reset(RD_COST *rd_cost) {
+  rd_cost->rate = INT_MAX;
+  rd_cost->dist = INT64_MAX;
+  rd_cost->rdcost = INT64_MAX;
+}
+
+void vp9_rd_cost_init(RD_COST *rd_cost) {
+  rd_cost->rate = 0;
+  rd_cost->dist = 0;
+  rd_cost->rdcost = 0;
+}
+
 // The baseline rd thresholds for breaking out of the rd loop for
 // certain modes are assumed to be based on 8x8 blocks.
 // This table is used to correct for block size.
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 33fb4ac94..1aa52663a 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -123,6 +123,11 @@ typedef struct RD_COST {
   int64_t rdcost;
 } RD_COST;
 
+// Reset the rate distortion cost values to maximum (invalid) value.
+void vp9_rd_cost_reset(RD_COST *rd_cost);
+// Initialize the rate distortion cost values to zero.
+void vp9_rd_cost_init(RD_COST *rd_cost);
+
 struct TileInfo;
 struct VP9_COMP;
 struct macroblock;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7565cc5c9..eca8e5880 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1140,12 +1140,14 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
   *sse = 0;
   *skippable = 1;
 
-  for (plane = 1; plane < MAX_MB_PLANE && is_cost_valid; ++plane) {
+  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
                      ref_best_rd, plane, bsize, uv_tx_size,
                      cpi->sf.use_fast_coef_costing);
-    if (pnrate == INT_MAX)
+    if (pnrate == INT_MAX) {
       is_cost_valid = 0;
+      break;
+    }
     *rate += pnrate;
     *distortion += pndist;
     *sse += pnsse;
@@ -3392,6 +3394,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
+    rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }
@@ -3562,6 +3565,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
   rd_cost->rdcost = this_rd;
 
   if (this_rd >= best_rd_so_far) {
+    rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }
@@ -4113,6 +4117,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (best_rd >= best_rd_so_far) {
+    rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index bec77d71f..9e3ee2c94 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -275,6 +275,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
     sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
     sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
     sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 25);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 23);
+    sf->partition_search_breakout_rate_thr = 200;
   }
 
   if (speed >= 6) {
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index cc6c2e52a..951b4af22 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -149,16 +149,12 @@ typedef enum {
 
 typedef enum {
   // Search partitions using RD/NONRD criterion
-  SEARCH_PARTITION = 0,
+  SEARCH_PARTITION,
 
   // Always use a fixed size partition
-  FIXED_PARTITION = 1,
+  FIXED_PARTITION,
 
-  // Use a fixed size partition in every 64X64 SB, where the size is
-  // determined based on source variance
-  VAR_BASED_FIXED_PARTITION = 2,
-
-  REFERENCE_PARTITION = 3,
+  REFERENCE_PARTITION,
 
   // Use an arbitrary partitioning scheme based on source variance within
   // a 64X64 SB
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index bf400d38b..bf5fa889f 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -41,40 +41,40 @@ static INLINE int sum_diff_16x1(__m128i acc_diff) {
 static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
                                              const uint8_t *mc_running_avg_y,
                                              uint8_t *running_avg_y,
-                                             const __m128i k_0,
-                                             const __m128i k_4,
-                                             const __m128i k_8,
-                                             const __m128i k_16,
-                                             const __m128i l3,
-                                             const __m128i l32,
-                                             const __m128i l21,
+                                             const __m128i *k_0,
+                                             const __m128i *k_4,
+                                             const __m128i *k_8,
+                                             const __m128i *k_16,
+                                             const __m128i *l3,
+                                             const __m128i *l32,
+                                             const __m128i *l21,
                                              __m128i acc_diff) {
   // Calculate differences
-  const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
+  const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
   const __m128i v_mc_running_avg_y = _mm_loadu_si128(
-                                     (__m128i *)(&mc_running_avg_y[0]));
+                                     (const __m128i *)(&mc_running_avg_y[0]));
   __m128i v_running_avg_y;
   const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
   const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
   // Obtain the sign. FF if diff is negative.
-  const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
+  const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0);
   // Clamp absolute difference to 16 to be used to get mask. Doing this
   // allows us to use _mm_cmpgt_epi8, which operates on signed byte.
   const __m128i clamped_absdiff = _mm_min_epu8(
-                                  _mm_or_si128(pdiff, ndiff), k_16);
+                                  _mm_or_si128(pdiff, ndiff), *k_16);
   // Get masks for l2 l1 and l0 adjustments.
-  const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
-  const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
-  const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
+  const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff);
+  const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff);
+  const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff);
   // Get adjustments for l2, l1, and l0.
-  __m128i adj2 = _mm_and_si128(mask2, l32);
-  const __m128i adj1 = _mm_and_si128(mask1, l21);
+  __m128i adj2 = _mm_and_si128(mask2, *l32);
+  const __m128i adj1 = _mm_and_si128(mask1, *l21);
   const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
   __m128i adj,  padj, nadj;
 
   // Combine the adjustments and get absolute adjustments.
   adj2 = _mm_add_epi8(adj2, adj1);
-  adj = _mm_sub_epi8(l3, adj2);
+  adj = _mm_sub_epi8(*l3, adj2);
   adj = _mm_andnot_si128(mask0, adj);
   adj = _mm_or_si128(adj, adj0);
 
@@ -103,9 +103,9 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(const uint8_t *sig,
                                               __m128i acc_diff) {
   __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
   // Calculate differences.
-  const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
+  const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
   const __m128i v_mc_running_avg_y =
-                _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0]));
+                _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
   const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
   const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
   // Obtain the sign. FF if diff is negative.
@@ -178,8 +178,8 @@ static int vp9_denoiser_4xM_sse2(const uint8_t *sig, int sig_stride,
     acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
                                       mc_running_buffer[r],
                                       running_buffer[r],
-                                      k_0, k_4, k_8, k_16,
-                                      l3, l32, l21, acc_diff);
+                                      &k_0, &k_4, &k_8, &k_16,
+                                      &l3, &l32, &l21, acc_diff);
     vpx_memcpy(running_avg_y, running_buffer[r], 4);
     vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4);
     vpx_memcpy(running_avg_y + avg_y_stride * 2,
@@ -279,8 +279,8 @@ static int vp9_denoiser_8xM_sse2(const uint8_t *sig, int sig_stride,
     acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
                                       mc_running_buffer[r],
                                       running_buffer[r],
-                                      k_0, k_4, k_8, k_16,
-                                      l3, l32, l21, acc_diff);
+                                      &k_0, &k_4, &k_8, &k_16,
+                                      &l3, &l32, &l21, acc_diff);
     vpx_memcpy(running_avg_y, running_buffer[r], 8);
     vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8);
     // Update pointers for next iteration.
@@ -357,9 +357,9 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
   const __m128i l21 = _mm_set1_epi8(1);
   int sum_diff = 0;
 
-  for (int i = 0; i < 4; i++) {
-    for (int j = 0; j < 4; j++) {
-      acc_diff[i][j] = _mm_setzero_si128();
+  for (c = 0; c < 4; ++c) {
+    for (r = 0; r < 4; ++r) {
+      acc_diff[c][r] = _mm_setzero_si128();
     }
   }
 
@@ -368,8 +368,8 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
       acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2(
                                sig, mc_running_avg_y,
                                running_avg_y,
-                               k_0, k_4, k_8, k_16,
-                               l3, l32, l21, acc_diff[c>>4][r>>4]);
+                               &k_0, &k_4, &k_8, &k_16,
+                               &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
       // Update pointers for next iteration.
       sig += 16;
       mc_running_avg_y += 16;