46 files changed, 1065 insertions, 543 deletions
diff --git a/test/android/scrape_gtest_log.py b/test/android/scrape_gtest_log.py
index c159c8938..487845c27 100644
--- a/test/android/scrape_gtest_log.py
+++ b/test/android/scrape_gtest_log.py
@@ -13,16 +13,45 @@ waterfall to gather json results mixed in with gtest logs.  This is
 dubious software engineering.
 """
 
+import getopt
 import json
+import os
 import re
 import sys
 
 
 def main():
+  if len(sys.argv) != 3:
+    print "Expects a file to write json to!"
+    exit(1)
+
+  try:
+    opts, _ = \
+        getopt.getopt(sys.argv[1:], \
+                      'o:', ['output-json='])
+  except getopt.GetOptError:
+    print 'scrape_gtest_log.py -o <output_json>'
+    sys.exit(2)
+
+  output_json = ''
+  for opt, arg in opts:
+    if opt in ('-o', '--output-json'):
+      output_json = os.path.join(arg)
+
   blob = sys.stdin.read()
   json_string = '[' + ','.join('{' + x + '}' for x in
                                re.findall(r'{([^}]*.?)}', blob)) + ']'
-  print json.dumps(json.loads(json_string), indent=4, sort_keys=True)
+  print blob
+
+  output = json.dumps(json.loads(json_string), indent=4, sort_keys=True)
+  print output
+
+  path = os.path.dirname(output_json)
+  if path and not os.path.exists(path):
+    os.makedirs(path)
+
+  outfile = open(output_json, 'w')
+  outfile.write(output)
 
 if __name__ == '__main__':
   sys.exit(main())
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index b8f668a78..cf2ad1eba 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -591,3 +591,47 @@ c21e97e4ba486520118d78b01a5cb6e6dc33e190  vp90-2-12-droppable_3.ivf
 601abc9e4176c70f82ac0381365e9b151fdd24cd  vp90-2-12-droppable_3.ivf.md5
 61c640dad23cd4f7ad811b867e7b7e3521f4e3ba  vp90-2-13-largescaling.webm
 bca1b02eebdb088fa3f389fe0e7571e75a71f523  vp90-2-13-largescaling.webm.md5
+c740708fa390806eebaf669909c1285ab464f886  vp90-2-14-resize-fp-tiles-1-2.webm
+c7b85ffd8e11500f73f52e7dc5a47f57c393d47f  vp90-2-14-resize-fp-tiles-1-2.webm.md5
+ec8faa352a08f7033c60f29f80d505e2d7daa103  vp90-2-14-resize-fp-tiles-1-4.webm
+6852c783fb421bda5ded3d4c5a3ffc46de03fbc1  vp90-2-14-resize-fp-tiles-1-4.webm.md5
+8af61853ac0d07c4cb5bf7c2016661ba350b3497  vp90-2-14-resize-fp-tiles-1-8.webm
+571353bac89fea60b5706073409aa3c0d42aefe9  vp90-2-14-resize-fp-tiles-1-8.webm.md5
+b1c187ed69931496b82ec194017a79831bafceef  vp90-2-14-resize-fp-tiles-1-16.webm
+1c199a41afe42ce303944d70089eaaa2263b4a09  vp90-2-14-resize-fp-tiles-1-16.webm.md5
+8eaae5a6f2dff934610b0c7a917d7f583ba74aa5  vp90-2-14-resize-fp-tiles-2-1.webm
+db18fcf915f7ffaea6c39feab8bda6c1688af011  vp90-2-14-resize-fp-tiles-2-1.webm.md5
+bc3046d138941e2a20e9ceec0ff6d25c25d12af3  vp90-2-14-resize-fp-tiles-4-1.webm
+393211b808030d09a79927b17a4374b2f68a60ae  vp90-2-14-resize-fp-tiles-4-1.webm.md5
+6e8f8e31721a0f7f68a2964e36e0e698c2e276b1  vp90-2-14-resize-fp-tiles-8-1.webm
+491fd3cd78fb0577bfe905bb64bbf64bd7d29140  vp90-2-14-resize-fp-tiles-8-1.webm.md5
+cc5958da2a7edf739cd2cfeb18bd05e77903087e  vp90-2-14-resize-fp-tiles-16-1.webm
+0b58daf55aaf9063bf5b4fb33393d18b417dc428  vp90-2-14-resize-fp-tiles-16-1.webm.md5
+821eeecc9d8c6a316134dd42d1ff057787d8047b  vp90-2-14-resize-fp-tiles-2-4.webm
+374c549f2839a3d0b732c4e3650700144037e76c  vp90-2-14-resize-fp-tiles-2-4.webm.md5
+dff8c8e49aacea9f4c7f22cb882da984e2a1b405  vp90-2-14-resize-fp-tiles-2-8.webm
+e5b8820a7c823b21297d6e889e57ec401882c210  vp90-2-14-resize-fp-tiles-2-8.webm.md5
+77629e4b23e32896aadf6e994c78bd4ffa1c7797  vp90-2-14-resize-fp-tiles-2-16.webm
+1937f5df032664ac345d4613ad4417b4967b1230  vp90-2-14-resize-fp-tiles-2-16.webm.md5
+380ba5702bb1ec7947697314ab0300b5c56a1665  vp90-2-14-resize-fp-tiles-4-2.webm
+fde7b30d2aa64c1e851a4852f655d79fc542cf66  vp90-2-14-resize-fp-tiles-4-2.webm.md5
+dc784b258ffa2abc2ae693d11792acf0bb9cb74f  vp90-2-14-resize-fp-tiles-8-2.webm
+edf26f0130aeee8342d49c2c8f0793ad008782d9  vp90-2-14-resize-fp-tiles-8-2.webm.md5
+8e575789fd63ebf69e8eff1b9a4351a249a73bee  vp90-2-14-resize-fp-tiles-16-2.webm
+b6415318c1c589a1f64b9d569ce3cabbec2e0d52  vp90-2-14-resize-fp-tiles-16-2.webm.md5
+e3adc944a11c4c5517e63664c84ebb0847b64d81  vp90-2-14-resize-fp-tiles-4-8.webm
+03cba0532bc90a05b1990db830bf5701e24e7982  vp90-2-14-resize-fp-tiles-4-8.webm.md5
+3b27a991eb6d78dce38efab35b7db682e8cbbee3  vp90-2-14-resize-fp-tiles-4-16.webm
+5d16b7f82bf59f802724ddfd97abb487150b1c9d  vp90-2-14-resize-fp-tiles-4-16.webm.md5
+d5fed8c28c1d4c7e232ebbd25cf758757313ed96  vp90-2-14-resize-fp-tiles-8-4.webm
+5a8ff8a52cbbde7bfab569beb6d971c5f8b904f7  vp90-2-14-resize-fp-tiles-8-4.webm.md5
+17a5faa023d77ee9dad423a4e0d3145796bbc500  vp90-2-14-resize-fp-tiles-16-4.webm
+2ef8daa3c3e750fd745130d0a76a39fe86f0448f  vp90-2-14-resize-fp-tiles-16-4.webm.md5
+9361e031f5cc990d8740863e310abb5167ae351e  vp90-2-14-resize-fp-tiles-8-16.webm
+57f13a2197486584f4e1a4f82ad969f3abc5a1a2  vp90-2-14-resize-fp-tiles-8-16.webm.md5
+5803fc6fcbfb47b7661f3fcc6499158a32b56675  vp90-2-14-resize-fp-tiles-16-8.webm
+be0fe64a1a4933696ff92d93f9bdecdbd886dc13  vp90-2-14-resize-fp-tiles-16-8.webm.md5
+0ac0f6d20a0afed77f742a3b9acb59fd7b9cb093  vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm
+1765315acccfe6cd12230e731369fcb15325ebfa  vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
+4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8  vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+1ef480392112b3509cb190afbb96f9a38dd9fbac  vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
diff --git a/test/test.mk b/test/test.mk
index 4d96bc69d..92664e225 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -698,6 +698,50 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 8c789ffe7..ff3c389e5 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -164,7 +164,20 @@ const char *const kVP9TestVectors[] = {
   "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
   "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
   "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
-  "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm"
+  "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm",
+  "vp90-2-14-resize-fp-tiles-1-16.webm",
+  "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
+  "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
+  "vp90-2-14-resize-fp-tiles-16-1.webm", "vp90-2-14-resize-fp-tiles-16-2.webm",
+  "vp90-2-14-resize-fp-tiles-16-4.webm",
+  "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm",
+  "vp90-2-14-resize-fp-tiles-16-8.webm", "vp90-2-14-resize-fp-tiles-1-8.webm",
+  "vp90-2-14-resize-fp-tiles-2-16.webm", "vp90-2-14-resize-fp-tiles-2-1.webm",
+  "vp90-2-14-resize-fp-tiles-2-4.webm", "vp90-2-14-resize-fp-tiles-2-8.webm",
+  "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm",
+  "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
+  "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
+  "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm"
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index a78cdea6b..5523f2024 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -153,6 +153,66 @@ TEST(VP9DecodeMTTest, MTDecode2) {
   }
 }
 
+// Test tile quantity changes within one file.
+TEST(VP9DecodeMTTest, MTDecode3) {
+  static const struct {
+    const char *name;
+    const char *expected_md5;
+  } files[] = {
+    { "vp90-2-14-resize-fp-tiles-1-16.webm",
+      "0cd5e632c326297e975f38949c31ea94" },
+    { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
+      "5c78a96a42e7f4a4f6b2edcdb791e44c" },
+    { "vp90-2-14-resize-fp-tiles-1-2.webm",
+      "e030450ae85c3277be2a418769df98e2" },
+    { "vp90-2-14-resize-fp-tiles-1-4.webm",
+      "312eed4e2b64eb7a4e7f18916606a430" },
+    { "vp90-2-14-resize-fp-tiles-16-1.webm",
+      "1755c16d8af16a9cb3fe7338d90abe52" },
+    { "vp90-2-14-resize-fp-tiles-16-2.webm",
+      "500300592d3fcb6f12fab25e48aaf4df" },
+    { "vp90-2-14-resize-fp-tiles-16-4.webm",
+      "47c48379fa6331215d91c67648e1af6e" },
+    { "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm",
+      "eecf17290739bc708506fa4827665989" },
+    { "vp90-2-14-resize-fp-tiles-16-8.webm",
+      "29b6bb54e4c26b5ca85d5de5fed94e76" },
+    { "vp90-2-14-resize-fp-tiles-1-8.webm",
+      "1b6f175e08cd82cf84bb800ac6d1caa3" },
+    { "vp90-2-14-resize-fp-tiles-2-16.webm",
+      "ca3b03e4197995d8d5444ede7a6c0804" },
+    { "vp90-2-14-resize-fp-tiles-2-1.webm",
+      "99aec065369d70bbb78ccdff65afed3f" },
+    { "vp90-2-14-resize-fp-tiles-2-4.webm",
+      "22d0ebdb49b87d2920a85aea32e1afd5" },
+    { "vp90-2-14-resize-fp-tiles-2-8.webm",
+      "c2115cf051c62e0f7db1d4a783831541" },
+    { "vp90-2-14-resize-fp-tiles-4-16.webm",
+      "c690d7e1719b31367564cac0af0939cb" },
+    { "vp90-2-14-resize-fp-tiles-4-1.webm",
+      "a926020b2cc3e15ad4cc271853a0ff26" },
+    { "vp90-2-14-resize-fp-tiles-4-2.webm",
+      "42699063d9e581f1993d0cf890c2be78" },
+    { "vp90-2-14-resize-fp-tiles-4-8.webm",
+      "7f76d96036382f45121e3d5aa6f8ec52" },
+    { "vp90-2-14-resize-fp-tiles-8-16.webm",
+      "76a43fcdd7e658542913ea43216ec55d" },
+    { "vp90-2-14-resize-fp-tiles-8-1.webm",
+      "8e3fbe89486ca60a59299dea9da91378" },
+    { "vp90-2-14-resize-fp-tiles-8-2.webm",
+      "ae96f21f21b6370cc0125621b441fc52" },
+    { "vp90-2-14-resize-fp-tiles-8-4.webm",
+      "3eb4f24f10640d42218f7fd7b9fd30d4" },
+  };
+
+  for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
+          << "threads = " << t;
+    }
+  }
+}
+
 INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool());
 
 }  // namespace
diff --git a/third_party/libwebm/README.webm b/third_party/libwebm/README.webm
index b13c8cbc6..2c7570d6d 100644
--- a/third_party/libwebm/README.webm
+++ b/third_party/libwebm/README.webm
@@ -1,5 +1,5 @@
 URL: https://chromium.googlesource.com/webm/libwebm
-Version: 630a0e3c338e1b32bddf513a2dad807908d2976a
+Version: a7118d8ec564e9db841da1eb01f547f3229f240a
 License: BSD
 License File: LICENSE.txt
 
diff --git a/third_party/libwebm/mkvmuxerutil.cpp b/third_party/libwebm/mkvmuxerutil.cpp
index 96350e9c5..18060e902 100644
--- a/third_party/libwebm/mkvmuxerutil.cpp
+++ b/third_party/libwebm/mkvmuxerutil.cpp
@@ -292,11 +292,11 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) {
   if (WriteID(writer, type))
     return false;
 
-  const int32 length = strlen(value);
+  const uint64 length = strlen(value);
   if (WriteUInt(writer, length))
     return false;
 
-  if (writer->Write(value, length))
+  if (writer->Write(value, static_cast<const uint32>(length)))
     return false;
 
   return true;
diff --git a/third_party/libwebm/mkvreader.cpp b/third_party/libwebm/mkvreader.cpp
index cb3567f1a..b4b24594c 100644
--- a/third_party/libwebm/mkvreader.cpp
+++ b/third_party/libwebm/mkvreader.cpp
@@ -14,13 +14,20 @@ namespace mkvparser
 {
 
 MkvReader::MkvReader() :
-    m_file(NULL)
-{
+    m_file(NULL),
+    reader_owns_file_(true) {
 }
 
-MkvReader::~MkvReader()
-{
+MkvReader::MkvReader(FILE* fp) :
+    m_file(fp),
+    reader_owns_file_(false) {
+  GetFileSize();
+}
+
+MkvReader::~MkvReader() {
+  if (reader_owns_file_)
     Close();
+  m_file = NULL;
 }
 
 int MkvReader::Open(const char* fileName)
@@ -42,12 +49,17 @@ int MkvReader::Open(const char* fileName)
     if (m_file == NULL)
         return -1;
 #endif
+    return !GetFileSize();
+}
 
+bool MkvReader::GetFileSize() {
+    if (m_file == NULL)
+        return false;
 #ifdef _MSC_VER
     int status = _fseeki64(m_file, 0L, SEEK_END);
 
     if (status)
-        return -1;  //error
+        return false;  //error
 
     m_length = _ftelli64(m_file);
 #else
@@ -56,16 +68,19 @@ int MkvReader::Open(const char* fileName)
 #endif
     assert(m_length >= 0);
 
+    if (m_length < 0)
+        return false;
+
 #ifdef _MSC_VER
     status = _fseeki64(m_file, 0L, SEEK_SET);
 
     if (status)
-        return -1;  //error
+        return false;  //error
 #else
     fseek(m_file, 0L, SEEK_SET);
 #endif
 
-    return 0;
+    return true;
 }
 
 void MkvReader::Close()
diff --git a/third_party/libwebm/mkvreader.hpp b/third_party/libwebm/mkvreader.hpp
index adcc29f47..8ebdd99a7 100644
--- a/third_party/libwebm/mkvreader.hpp
+++ b/third_party/libwebm/mkvreader.hpp
@@ -21,6 +21,7 @@ class MkvReader : public IMkvReader
     MkvReader& operator=(const MkvReader&);
 public:
     MkvReader();
+    MkvReader(FILE* fp);
     virtual ~MkvReader();
 
     int Open(const char*);
@@ -29,8 +30,15 @@ public:
     virtual int Read(long long position, long length, unsigned char* buffer);
     virtual int Length(long long* total, long long* available);
 private:
+
+    // Determines the size of the file. This is called either by the constructor
+    // or by the Open function depending on file ownership. Returns true on
+    // success.
+    bool GetFileSize();
+
     long long m_length;
     FILE* m_file;
+    bool reader_owns_file_;
 };
 
 }  //end namespace mkvparser
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 779dce017..068284faa 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -25,6 +25,23 @@ extern "C" {
 
 #define MI_MASK (MI_BLOCK_SIZE - 1)
 
+// Bitstream profiles indicated by 2 bits in the uncompressed header.
+// 00: Profile 0. 4:2:0 only.
+// 10: Profile 1. adds 4:4:4, 4:2:2, alpha.
+// 01: Profile 2. Supports 10-bit and 12-bit color only.
+// 11: Undefined profile.
+typedef enum BITSTREAM_PROFILE {
+  PROFILE_0,
+  PROFILE_1,
+  PROFILE_2,
+  MAX_PROFILES
+} BITSTREAM_PROFILE;
+
+typedef enum BIT_DEPTH {
+  BITS_8,
+  BITS_10,
+  BITS_12
+} BIT_DEPTH;
 
 typedef enum BLOCK_SIZE {
   BLOCK_4X4,
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 61682c42d..1aab36205 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -195,7 +195,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              int block, int mi_row, int mi_col) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
-  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+  const MODE_INFO *prev_mi = cm->prev_mi
         ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
         : NULL;
   const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index ea1b8856e..fe9cc9e6a 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -179,7 +179,10 @@ typedef struct VP9Common {
   FRAME_COUNTS counts;
 
   unsigned int current_video_frame;
-  int version;
+  BITSTREAM_PROFILE profile;
+
+  // BITS_8 in versions 0 and 1, BITS_10 or BITS_12 in version 2
+  BIT_DEPTH bit_depth;
 
 #if CONFIG_VP9_POSTPROC
   struct postproc_state  postproc_state;
@@ -281,15 +284,15 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
   xd->left_available  = (mi_col > tile->mi_col_start);
 }
 
-static INLINE void set_prev_mi(VP9_COMMON *cm) {
-  const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
-                                       cm->height == cm->last_height &&
-                                       !cm->intra_only &&
-                                       cm->last_show_frame;
+static INLINE MODE_INFO *get_prev_mi(VP9_COMMON *cm) {
+  const int use_prev_mi = cm->coding_use_prev_mi &&
+                          cm->width == cm->last_width &&
+                          cm->height == cm->last_height &&
+                          !cm->intra_only &&
+                          cm->last_show_frame;
   // Special case: set prev_mi to NULL when the previous mode info
   // context cannot be used.
-  cm->prev_mi = use_prev_in_find_mv_refs ?
-                  cm->prev_mip + cm->mi_stride + 1 : NULL;
+  return use_prev_mi ? &cm->prev_mip[cm->mi_stride + 1] : NULL;
 }
 
 static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index b874ef3ba..b45559245 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -380,6 +380,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
 
+add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_16x16 sse2/;
+$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
+
 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
 
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index 7e9cc840a..b84db970e 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -33,10 +33,11 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
 };
 
 #if defined(__clang__)
-# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3)
+# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \
+      (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0)
 #  define MM256_BROADCASTSI128_SI256(x) \
        _mm_broadcastsi128_si256((__m128i const *)&(x))
-# else  // clang > 3.3
+# else  // clang > 3.3, and not 5.0 on macosx.
 #  define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
 # endif  // clang <= 3.3
 #elif defined(__GNUC__)
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 5a2e6f881..9b63961f0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -671,7 +671,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
   setup_display_size(cm, rb);
 }
 
-static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
+static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
                         vp9_reader *r) {
   const int num_threads = pbi->oxcf.max_threads;
   VP9_COMMON *const cm = &pbi->common;
@@ -776,7 +776,7 @@ typedef struct TileBuffer {
   int col;  // only used with multi-threaded decoding
 } TileBuffer;
 
-static const uint8_t *decode_tiles(VP9D_COMP *pbi,
+static const uint8_t *decode_tiles(VP9Decoder *pbi,
                                    const uint8_t *data,
                                    const uint8_t *data_end) {
   VP9_COMMON *const cm = &pbi->common;
@@ -865,7 +865,7 @@ static int compare_tile_buffers(const void *a, const void *b) {
   }
 }
 
-static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
+static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
                                       const uint8_t *data,
                                       const uint8_t *data_end) {
   VP9_COMMON *const cm = &pbi->common;
@@ -882,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
   assert(tile_rows == 1);
   (void)tile_rows;
 
-  if (num_workers > pbi->num_tile_workers) {
+  // TODO(jzern): See if we can remove the restriction of passing in max
+  // threads to the decoder.
+  if (pbi->num_tile_workers == 0) {
+    const int num_threads = pbi->oxcf.max_threads & ~1;
     int i;
+    // TODO(jzern): Allocate one less worker, as in the current code we only
+    // use num_threads - 1 workers.
     CHECK_MEM_ERROR(cm, pbi->tile_workers,
-                    vpx_realloc(pbi->tile_workers,
-                                num_workers * sizeof(*pbi->tile_workers)));
-    for (i = pbi->num_tile_workers; i < num_workers; ++i) {
+                    vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
+    for (i = 0; i < num_threads; ++i) {
       VP9Worker *const worker = &pbi->tile_workers[i];
       ++pbi->num_tile_workers;
 
@@ -895,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
       CHECK_MEM_ERROR(cm, worker->data1,
                       vpx_memalign(32, sizeof(TileWorkerData)));
       CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
-      if (i < num_workers - 1 && !vp9_worker_reset(worker)) {
+      if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
         vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                            "Tile decoder thread creation failed");
       }
@@ -903,7 +907,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
   }
 
   // Reset tile decoding hook
-  for (n = 0; n < pbi->num_tile_workers; ++n) {
+  for (n = 0; n < num_workers; ++n) {
     pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
   }
 
@@ -1005,12 +1009,13 @@ static void error_handler(void *data) {
   vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
 }
 
-#define RESERVED \
-  if (vp9_rb_read_bit(rb)) \
-      vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \
-                         "Reserved bit must be unset")
+static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) {
+  int profile = vp9_rb_read_bit(rb);
+  profile |= vp9_rb_read_bit(rb) << 1;
+  return (BITSTREAM_PROFILE) profile;
+}
 
-static size_t read_uncompressed_header(VP9D_COMP *pbi,
+static size_t read_uncompressed_header(VP9Decoder *pbi,
                                        struct vp9_read_bit_buffer *rb) {
   VP9_COMMON *const cm = &pbi->common;
   size_t sz;
@@ -1022,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
       vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                          "Invalid frame marker");
 
-  cm->version = vp9_rb_read_bit(rb);
-  RESERVED;
+  cm->profile = read_profile(rb);
+  if (cm->profile >= MAX_PROFILES)
+    vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                       "Unsupported bitstream profile");
 
   cm->show_existing_frame = vp9_rb_read_bit(rb);
   if (cm->show_existing_frame) {
@@ -1048,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
 
   if (cm->frame_type == KEY_FRAME) {
     check_sync_code(cm, rb);
-
+    if (cm->profile > PROFILE_1)
+      cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
     cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
     if (cm->color_space != SRGB) {
       vp9_rb_read_bit(rb);  // [16,235] (including xvycc) vs [0,255] range
-      if (cm->version == 1) {
+      if (cm->profile >= PROFILE_1) {
         cm->subsampling_x = vp9_rb_read_bit(rb);
         cm->subsampling_y = vp9_rb_read_bit(rb);
         vp9_rb_read_bit(rb);  // has extra plane
@@ -1060,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
         cm->subsampling_y = cm->subsampling_x = 1;
       }
     } else {
-      if (cm->version == 1) {
+      if (cm->profile >= PROFILE_1) {
         cm->subsampling_y = cm->subsampling_x = 0;
         vp9_rb_read_bit(rb);  // has extra plane
       } else {
@@ -1147,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
   return sz;
 }
 
-static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
+static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
                                   size_t partition_size) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
@@ -1247,7 +1255,7 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
 }
 #endif  // NDEBUG
 
-int vp9_decode_frame(VP9D_COMP *pbi,
+int vp9_decode_frame(VP9Decoder *pbi,
                      const uint8_t *data, const uint8_t *data_end,
                      const uint8_t **p_data_end) {
   VP9_COMMON *const cm = &pbi->common;
@@ -1288,11 +1296,7 @@ int vp9_decode_frame(VP9D_COMP *pbi,
   }
 
   init_macroblockd(cm, &pbi->mb);
-
-  if (cm->coding_use_prev_mi)
-    set_prev_mi(cm);
-  else
-    cm->prev_mi = NULL;
+  cm->prev_mi = get_prev_mi(cm);
 
   setup_plane_dequants(cm, xd, cm->base_qindex);
   vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index e474db495..8a19dafc5 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -17,11 +17,11 @@ extern "C" {
 #endif
 
 struct VP9Common;
-struct VP9Decompressor;
+struct VP9Decoder;
 
 void vp9_init_dequantizer(struct VP9Common *cm);
 
-int vp9_decode_frame(struct VP9Decompressor *pbi,
+int vp9_decode_frame(struct VP9Decoder *pbi,
                      const uint8_t *data, const uint8_t *data_end,
                      const uint8_t **p_data_end);
 
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fb3666cbe..fd74478e9 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -110,8 +110,8 @@ void vp9_initialize_dec() {
   }
 }
 
-VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
-  VP9D_COMP *const pbi = vpx_memalign(32, sizeof(*pbi));
+VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
+  VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
   VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
 
   if (!cm)
@@ -152,7 +152,7 @@ VP9D_COMP *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
   return pbi;
 }
 
-void vp9_decoder_remove(VP9D_COMP *pbi) {
+void vp9_decoder_remove(VP9Decoder *pbi) {
   VP9_COMMON *const cm = &pbi->common;
   int i;
 
@@ -182,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
            a->uv_height == b->uv_height && a->uv_width == b->uv_width;
 }
 
-vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi,
+vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi,
                                        VP9_REFFRAME ref_frame_flag,
                                        YV12_BUFFER_CONFIG *sd) {
   VP9_COMMON *cm = &pbi->common;
@@ -252,7 +252,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
 }
 
 
-int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) {
+int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) {
   VP9_COMMON *cm = &pbi->common;
 
   if (index < 0 || index >= REF_FRAMES)
@@ -263,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) {
 }
 
 /* If any buffer updating is signaled it should be done here. */
-static void swap_frame_buffers(VP9D_COMP *pbi) {
+static void swap_frame_buffers(VP9Decoder *pbi) {
   int ref_index = 0, mask;
   VP9_COMMON *const cm = &pbi->common;
 
@@ -287,7 +287,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
     cm->frame_refs[ref_index].idx = INT_MAX;
 }
 
-int vp9_receive_compressed_data(VP9D_COMP *pbi,
+int vp9_receive_compressed_data(VP9Decoder *pbi,
                                 size_t size, const uint8_t **psource,
                                 int64_t time_stamp) {
   VP9_COMMON *const cm = &pbi->common;
@@ -403,7 +403,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
   return retcode;
 }
 
-int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
+int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
                       int64_t *time_stamp, int64_t *time_end_stamp,
                       vp9_ppflags_t *flags) {
   int ret = -1;
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 2e8bebdae..c9dc25191 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -35,7 +35,7 @@ typedef struct {
   int inv_tile_order;
 } VP9D_CONFIG;
 
-typedef struct VP9Decompressor {
+typedef struct VP9Decoder {
   DECLARE_ALIGNED(16, MACROBLOCKD, mb);
 
   DECLARE_ALIGNED(16, VP9_COMMON, common);
@@ -59,20 +59,20 @@ typedef struct VP9Decompressor {
   int num_tile_workers;
 
   VP9LfSync lf_row_sync;
-} VP9D_COMP;
+} VP9Decoder;
 
 void vp9_initialize_dec();
 
-int vp9_receive_compressed_data(struct VP9Decompressor *pbi,
+int vp9_receive_compressed_data(struct VP9Decoder *pbi,
                                 size_t size, const uint8_t **dest,
                                 int64_t time_stamp);
 
-int vp9_get_raw_frame(struct VP9Decompressor *pbi,
+int vp9_get_raw_frame(struct VP9Decoder *pbi,
                       YV12_BUFFER_CONFIG *sd,
                       int64_t *time_stamp, int64_t *time_end_stamp,
                       vp9_ppflags_t *flags);
 
-vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi,
+vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi,
                                        VP9_REFFRAME ref_frame_flag,
                                        YV12_BUFFER_CONFIG *sd);
 
@@ -80,13 +80,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                       VP9_REFFRAME ref_frame_flag,
                                       YV12_BUFFER_CONFIG *sd);
 
-int vp9_get_reference_dec(struct VP9Decompressor *pbi,
+int vp9_get_reference_dec(struct VP9Decoder *pbi,
                           int index, YV12_BUFFER_CONFIG **fb);
 
 
-struct VP9Decompressor *vp9_decoder_create(const VP9D_CONFIG *oxcf);
+struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf);
 
-void vp9_decoder_remove(struct VP9Decompressor *pbi);
+void vp9_decoder_remove(struct VP9Decoder *pbi);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 163936021..9b124c9d9 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -132,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
 
 // VP9 decoder: Implement multi-threaded loopfilter that uses the tile
 // threads.
-void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
+void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
                               VP9_COMMON *cm,
                               MACROBLOCKD *xd,
                               int frame_filter_level,
                               int y_only, int partial_frame) {
   // Number of superblock rows and cols
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+  const int tile_cols = 1 << cm->log2_tile_cols;
+  const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
   int i;
 
   // Allocate memory used in thread synchronization.
@@ -168,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
              sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
 
   // Set up loopfilter thread data.
-  for (i = 0; i < pbi->num_tile_workers; ++i) {
+  // The decoder is using num_workers instead of pbi->num_tile_workers
+  // because it has been observed that using more threads on the
+  // loopfilter, than there are tile columns in the frame will hurt
+  // performance on Android. This is because the system will only
+  // schedule the tile decode workers on cores equal to the number
+  // of tile columns. Then if the decoder tries to use more threads for the
+  // loopfilter, it will hurt performance because of contention. If the
+  // multithreading code changes in the future then the number of workers
+  // used by the loopfilter should be revisited.
+  for (i = 0; i < num_workers; ++i) {
     VP9Worker *const worker = &pbi->tile_workers[i];
     TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
     LFWorkerData *const lf_data = &tile_data->lfdata;
@@ -184,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
     lf_data->y_only = y_only;   // always do all planes in decoder
 
     lf_data->lf_sync = &pbi->lf_row_sync;
-    lf_data->num_lf_workers = pbi->num_tile_workers;
+    lf_data->num_lf_workers = num_workers;
 
     // Start loopfiltering
-    if (i == pbi->num_tile_workers - 1) {
+    if (i == num_workers - 1) {
       vp9_worker_execute(worker);
     } else {
       vp9_worker_launch(worker);
@@ -195,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
   }
 
   // Wait till all rows are finished
-  for (i = 0; i < pbi->num_tile_workers; ++i) {
+  for (i = 0; i < num_workers; ++i) {
     vp9_worker_sync(&pbi->tile_workers[i]);
   }
 }
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index 2f65e1e30..005bd7bbd 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -18,7 +18,7 @@
 
 struct macroblockd;
 struct VP9Common;
-struct VP9Decompressor;
+struct VP9Decoder;
 
 typedef struct TileWorkerData {
   struct VP9Common *cm;
@@ -50,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
 void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
 
 // Multi-threaded loopfilter that uses the tile threads.
-void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi,
+void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
                               struct VP9Common *cm,
                               struct macroblockd *xd,
                               int frame_filter_level,
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c
index 83892e872..47ad8d8cc 100644
--- a/vp9/encoder/vp9_aq_complexity.c
+++ b/vp9/encoder/vp9_aq_complexity.c
@@ -48,8 +48,7 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
     // Use some of the segments for in frame Q adjustment.
     for (segment = 1; segment < 2; segment++) {
       const int qindex_delta =
-          vp9_compute_qdelta_by_rate(cpi,
-                                     cm->base_qindex,
+          vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
                                      in_frame_q_adj_ratio[segment]);
       vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
       vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 2e1b4ef5f..787909142 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -241,7 +241,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
 
     // Set the q delta for segment 1.
-    qindex_delta = vp9_compute_qdelta_by_rate(cpi,
+    qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type,
                                               cm->base_qindex,
                                               rate_ratio_qdelta);
     // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from
diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c
index c25eb95c7..ae2a163b1 100644
--- a/vp9/encoder/vp9_aq_variance.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -99,7 +99,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
         continue;
       }
 
-      qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i));
+      qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i));
       vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
       vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
 
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 4313418d4..8d2afb991 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1031,19 +1031,22 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) {
   vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8);
 }
 
+static void write_profile(BITSTREAM_PROFILE profile,
+                          struct vp9_write_bit_buffer *wb) {
+  assert(profile < MAX_PROFILES);
+  vp9_wb_write_bit(wb, profile & 1);
+  vp9_wb_write_bit(wb, profile >> 1);
+}
+
 static void write_uncompressed_header(VP9_COMP *cpi,
                                       struct vp9_write_bit_buffer *wb) {
   VP9_COMMON *const cm = &cpi->common;
 
   vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2);
 
-  // bitstream version.
-  // 00 - profile 0. 4:2:0 only
-  // 10 - profile 1. adds 4:4:4, 4:2:2, alpha
-  vp9_wb_write_bit(wb, cm->version);
-  vp9_wb_write_bit(wb, 0);
+  write_profile(cm->profile, wb);
 
-  vp9_wb_write_bit(wb, 0);
+  vp9_wb_write_bit(wb, 0);  // show_existing_frame
   vp9_wb_write_bit(wb, cm->frame_type);
   vp9_wb_write_bit(wb, cm->show_frame);
   vp9_wb_write_bit(wb, cm->error_resilient_mode);
@@ -1051,16 +1054,20 @@ static void write_uncompressed_header(VP9_COMP *cpi,
   if (cm->frame_type == KEY_FRAME) {
     const COLOR_SPACE cs = UNKNOWN;
     write_sync_code(wb);
+    if (cm->profile > PROFILE_1) {
+      assert(cm->bit_depth > BITS_8);
+      vp9_wb_write_bit(wb, cm->bit_depth - BITS_10);
+    }
     vp9_wb_write_literal(wb, cs, 3);
     if (cs != SRGB) {
       vp9_wb_write_bit(wb, 0);  // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
-      if (cm->version == 1) {
+      if (cm->profile >= PROFILE_1) {
         vp9_wb_write_bit(wb, cm->subsampling_x);
         vp9_wb_write_bit(wb, cm->subsampling_y);
         vp9_wb_write_bit(wb, 0);  // has extra plane
       }
     } else {
-      assert(cm->version == 1);
+      assert(cm->profile == PROFILE_1);
       vp9_wb_write_bit(wb, 0);  // has extra plane
     }
 
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 1ca3c2881..61a5022ec 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1048,9 +1048,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                         (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2));
     if (!is_edge && (complexity > 128))
       x->rdmult += ((x->rdmult * (complexity - 128)) / 256);
-  } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+  } else if (aq_mode == CYCLIC_REFRESH_AQ) {
     const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
-        : cm->last_frame_seg_map;
+                                                  : cm->last_frame_seg_map;
     // If segment 1, use rdmult for that segment.
     if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col))
       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
@@ -1076,8 +1076,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
       vp9_clear_system_state();
       *totalrate = (int)round(*totalrate * rdmult_ratio);
     }
-  } else if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) ||
-      (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)) {
+  } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) {
     x->rdmult = orig_rdmult;
   }
 }
@@ -1365,6 +1364,69 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
+static void constrain_copy_partitioning(VP9_COMP *const cpi,
+                                        const TileInfo *const tile,
+                                        MODE_INFO **mi_8x8,
+                                        MODE_INFO **prev_mi_8x8,
+                                        int mi_row, int mi_col,
+                                        BLOCK_SIZE bsize) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int mis = cm->mi_stride;
+  const int row8x8_remaining = tile->mi_row_end - mi_row;
+  const int col8x8_remaining = tile->mi_col_end - mi_col;
+  MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col;
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  int block_row, block_col;
+
+  assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
+
+  // If the SB64 if it is all "in image".
+  if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
+      (row8x8_remaining >= MI_BLOCK_SIZE)) {
+    for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
+      for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
+        const int index = block_row * mis + block_col;
+        MODE_INFO *prev_mi = prev_mi_8x8[index];
+        const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+        // Use previous partition if block size is not larger than bsize.
+        if (prev_mi && sb_type <= bsize) {
+          int block_row2, block_col2;
+          for (block_row2 = 0; block_row2 < bh; ++block_row2) {
+            for (block_col2 = 0; block_col2 < bw; ++block_col2) {
+              const int index2 = (block_row + block_row2) * mis +
+                  block_col + block_col2;
+              prev_mi = prev_mi_8x8[index2];
+              if (prev_mi) {
+                const ptrdiff_t offset = prev_mi - cm->prev_mi;
+                mi_8x8[index2] = cm->mi + offset;
+                mi_8x8[index2]->mbmi.sb_type = prev_mi->mbmi.sb_type;
+              }
+            }
+          }
+        } else {
+          // Otherwise, use fixed partition of size bsize.
+          mi_8x8[index] = mi_upper_left + index;
+          mi_8x8[index]->mbmi.sb_type = bsize;
+        }
+      }
+    }
+  } else {
+    // Else this is a partial SB64, copy previous partition.
+    for (block_row = 0; block_row < 8; ++block_row) {
+      for (block_col = 0; block_col < 8; ++block_col) {
+        MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
+        const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+        if (prev_mi) {
+          const ptrdiff_t offset = prev_mi - cm->prev_mi;
+          mi_8x8[block_row * mis + block_col] = cm->mi + offset;
+          mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
+        }
+      }
+    }
+  }
+}
+
 static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
                               MODE_INFO **prev_mi_8x8) {
   const int mis = cm->mi_stride;
@@ -1384,6 +1446,125 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
   }
 }
 
+const struct {
+  int row;
+  int col;
+} coord_lookup[16] = {
+    // 32x32 index = 0
+    {0, 0}, {0, 2}, {2, 0}, {2, 2},
+    // 32x32 index = 1
+    {0, 4}, {0, 6}, {2, 4}, {2, 6},
+    // 32x32 index = 2
+    {4, 0}, {4, 2}, {6, 0}, {6, 2},
+    // 32x32 index = 3
+    {4, 4}, {4, 6}, {6, 4}, {6, 6},
+};
+
+static void set_source_var_based_partition(VP9_COMP *cpi,
+                                           const TileInfo *const tile,
+                                           MODE_INFO **mi_8x8,
+                                           int mi_row, int mi_col) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *x = &cpi->mb;
+  const int mis = cm->mi_stride;
+  int row8x8_remaining = tile->mi_row_end - mi_row;
+  int col8x8_remaining = tile->mi_col_end - mi_col;
+  int r, c;
+  MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
+
+  assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
+
+  // In-image SB64
+  if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
+      (row8x8_remaining >= MI_BLOCK_SIZE)) {
+    const int src_stride = x->plane[0].src.stride;
+    const int pre_stride = cpi->Last_Source->y_stride;
+    const uint8_t *src = x->plane[0].src.buf;
+    const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
+                           (mi_col * MI_SIZE);
+    const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
+    const int thr_32x32 = cpi->sf.source_var_thresh;
+    const int thr_64x64 = thr_32x32 << 1;
+    int i, j;
+    int index;
+    diff d32[4];
+    int use16x16 = 0;
+
+    for (i = 0; i < 4; i++) {
+      diff d16[4];
+
+      for (j = 0; j < 4; j++) {
+        int b_mi_row = coord_lookup[i * 4 + j].row;
+        int b_mi_col = coord_lookup[i * 4 + j].col;
+        int b_offset = b_mi_row * MI_SIZE * src_stride +
+                       b_mi_col * MI_SIZE;
+
+        vp9_get_sse_sum_16x16(src + b_offset,
+                              src_stride,
+                              pre_src + b_offset,
+                              pre_stride, &d16[j].sse, &d16[j].sum);
+
+        d16[j].var = d16[j].sse -
+            (((uint32_t)d16[j].sum * d16[j].sum) >> 8);
+
+        index = b_mi_row * mis + b_mi_col;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
+
+        // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
+        // size to further improve quality.
+      }
+
+      if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 &&
+          d16[2].var < thr_32x32 && d16[3].var < thr_32x32) {
+        d32[i].sse = d16[0].sse;
+        d32[i].sum = d16[0].sum;
+
+        for (j = 1; j < 4; j++) {
+          d32[i].sse += d16[j].sse;
+          d32[i].sum += d16[j].sum;
+        }
+
+        d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
+
+        index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
+
+        if (!((cm->current_video_frame - 1) %
+            cpi->sf.search_type_check_frequency))
+          cpi->use_large_partition_rate += 1;
+      } else {
+        use16x16 = 1;
+      }
+    }
+
+    if (!use16x16) {
+      if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 &&
+          d32[2].var < thr_64x64 && d32[3].var < thr_64x64)  {
+        mi_8x8[0] = mi_upper_left;
+        mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
+      }
+    }
+  } else {   // partial in-image SB64
+    BLOCK_SIZE bsize = BLOCK_16X16;
+    int bh = num_8x8_blocks_high_lookup[bsize];
+    int bw = num_8x8_blocks_wide_lookup[bsize];
+
+    for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
+      for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
+        int index = r * mis + c;
+        // Find a partition size that fits
+        bsize = find_partition_size(bsize,
+                                    (row8x8_remaining - r),
+                                    (col8x8_remaining - c), &bh, &bw);
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = bsize;
+      }
+    }
+  }
+}
+
 static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
   const int mis = cm->mi_stride;
   int block_row, block_col;
@@ -2297,6 +2478,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                              int mi_row, TOKENEXTRA **tp) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  SPEED_FEATURES *const sf = &cpi->sf;
   int mi_col;
 
   // Initialize the left context for the new SB row
@@ -2312,7 +2494,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     BLOCK_SIZE i;
     MACROBLOCK *x = &cpi->mb;
 
-    if (cpi->sf.adaptive_pred_interp_filter) {
+    if (sf->adaptive_pred_interp_filter) {
       for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
         const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
         const int num_4x4_h = num_4x4_blocks_high_lookup[i];
@@ -2326,64 +2508,69 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
 
     vp9_zero(cpi->mb.pred_mv);
 
-    if ((cpi->sf.partition_search_type == SEARCH_PARTITION &&
-         cpi->sf.use_lastframe_partitioning) ||
-        cpi->sf.partition_search_type == FIXED_PARTITION ||
-        cpi->sf.partition_search_type == VAR_BASED_PARTITION ||
-        cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
+    if ((sf->partition_search_type == SEARCH_PARTITION &&
+         sf->use_lastframe_partitioning) ||
+         sf->partition_search_type == FIXED_PARTITION ||
+         sf->partition_search_type == VAR_BASED_PARTITION ||
+         sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
       const int idx_str = cm->mi_stride * mi_row + mi_col;
       MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
       MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
       cpi->mb.source_variance = UINT_MAX;
-      if (cpi->sf.partition_search_type == FIXED_PARTITION) {
+      if (sf->partition_search_type == FIXED_PARTITION) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                               cpi->sf.always_this_block_size);
+                               sf->always_this_block_size);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
-      } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
+      } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
-      } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+      } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
         choose_partitioning(cpi, tile, mi_row, mi_col);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
       } else {
         if ((cm->current_video_frame
-            % cpi->sf.last_partitioning_redo_frequency) == 0
+            % sf->last_partitioning_redo_frequency) == 0
             || cm->prev_mi == 0
             || cm->show_frame == 0
             || cm->frame_type == KEY_FRAME
             || cpi->rc.is_src_frame_alt_ref
-            || ((cpi->sf.use_lastframe_partitioning ==
+            || ((sf->use_lastframe_partitioning ==
                  LAST_FRAME_PARTITION_LOW_MOTION) &&
                  sb_has_motion(cm, prev_mi_8x8))) {
           // If required set upper and lower partition size limits
-          if (cpi->sf.auto_min_max_partition_size) {
+          if (sf->auto_min_max_partition_size) {
             set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
             rd_auto_partition_range(cpi, tile, mi_row, mi_col,
-                                    &cpi->sf.min_partition_size,
-                                    &cpi->sf.max_partition_size);
+                                    &sf->min_partition_size,
+                                    &sf->max_partition_size);
           }
           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                             &dummy_rate, &dummy_dist, 1, INT64_MAX);
         } else {
-          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
+          if (sf->constrain_copy_partition &&
+              sb_has_motion(cm, prev_mi_8x8))
+            constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8,
+                                        mi_row, mi_col, BLOCK_16X16);
+          else
+            copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                            &dummy_rate, &dummy_dist, 1);
         }
       }
     } else {
       // If required set upper and lower partition size limits
-      if (cpi->sf.auto_min_max_partition_size) {
+      if (sf->auto_min_max_partition_size) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         rd_auto_partition_range(cpi, tile, mi_row, mi_col,
-                                &cpi->sf.min_partition_size,
-                                &cpi->sf.max_partition_size);
+                                &sf->min_partition_size,
+                                &sf->max_partition_size);
       }
       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rate, &dummy_dist, 1, INT64_MAX);
@@ -2448,19 +2635,6 @@ static int check_dual_ref_flags(VP9_COMP *cpi) {
   }
 }
 
-static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
-  int x, y;
-
-  for (y = 0; y < ymbs; y++) {
-    for (x = 0; x < xmbs; x++) {
-      if (!mi_8x8[y * mis + x]->mbmi.skip)
-        return 0;
-    }
-  }
-
-  return 1;
-}
-
 static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
   int mi_row, mi_col;
   const int mis = cm->mi_stride;
@@ -2911,12 +3085,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  if (bsize >= BLOCK_8X8) {
-    subsize = mi_8x8[0]->mbmi.sb_type;
-  } else {
-    subsize = BLOCK_4X4;
-  }
-
+  subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4;
   partition = partition_lookup[bsl][subsize];
 
   switch (partition) {
@@ -3019,10 +3188,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     const int idx_str = cm->mi_stride * mi_row + mi_col;
     MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
     MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
-
-    BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
-        cpi->sf.always_this_block_size :
-        get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
+    BLOCK_SIZE bsize;
 
     cpi->mb.source_variance = UINT_MAX;
     vp9_zero(cpi->mb.pred_mv);
@@ -3034,8 +3200,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist);
         break;
+      case SOURCE_VAR_BASED_PARTITION:
+        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+        set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
+        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist);
+        break;
       case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
+        bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
+                cpi->sf.always_this_block_size :
+                get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
         set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist);
@@ -3058,53 +3233,42 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
 // end RTC play code
 
 static void encode_frame_internal(VP9_COMP *cpi) {
-  int mi_row;
+  SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCK *const x = &cpi->mb;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
 
-//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-//           cpi->common.current_video_frame, cpi->common.show_frame,
-//           cm->frame_type);
-
   xd->mi = cm->mi_grid_visible;
   xd->mi[0] = cm->mi;
 
   vp9_zero(cm->counts);
   vp9_zero(cpi->coef_counts);
   vp9_zero(cpi->tx_stepdown_count);
+  vp9_zero(cpi->rd_comp_pred_diff);
+  vp9_zero(cpi->rd_filter_diff);
+  vp9_zero(cpi->rd_tx_select_diff);
+  vp9_zero(cpi->rd_tx_select_threshes);
 
-  // Set frame level transform size use case
   cm->tx_mode = select_tx_mode(cpi);
 
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
-      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 &&
+                           cm->y_dc_delta_q == 0 &&
+                           cm->uv_dc_delta_q == 0 &&
+                           cm->uv_ac_delta_q == 0;
   switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
 
   vp9_frame_init_quantizer(cpi);
 
   vp9_initialize_rd_consts(cpi);
   vp9_initialize_me_consts(cpi, cm->base_qindex);
-
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
-    // Initialize encode frame context.
-    init_encode_frame_mb_context(cpi);
-
-    // Build a frame level activity map
-    build_activity_map(cpi);
-  }
-
-  // Re-initialize encode frame context.
   init_encode_frame_mb_context(cpi);
 
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
+  if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
+    build_activity_map(cpi);
 
-  set_prev_mi(cm);
+  cm->prev_mi = get_prev_mi(cm);
 
-  if (cpi->sf.use_nonrd_pick_mode) {
+  if (sf->use_nonrd_pick_mode) {
     // Initialize internal buffer pointers for rtc coding, where non-RD
     // mode decision is used and hence no buffer pointer swap needed.
     int i;
@@ -3119,6 +3283,29 @@ static void encode_frame_internal(VP9_COMP *cpi) {
       p[i].eobs = ctx->eobs_pbuf[i][0];
     }
     vp9_zero(x->zcoeff_blk);
+
+    if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION &&
+        cm->current_video_frame > 0) {
+      int check_freq = cpi->sf.search_type_check_frequency;
+
+      if ((cm->current_video_frame - 1) % check_freq == 0) {
+        cpi->use_large_partition_rate = 0;
+      }
+
+      if ((cm->current_video_frame - 1) % check_freq == 1) {
+        const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] -
+                                  b_width_log2_lookup[BLOCK_16X16]) +
+                                  (b_height_log2_lookup[BLOCK_32X32] -
+                                  b_height_log2_lookup[BLOCK_16X16]));
+        cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 *
+                                        mbs_in_b32x32 / cm->MBs;
+      }
+
+      if ((cm->current_video_frame - 1) % check_freq >= 1) {
+        if (cpi->use_large_partition_rate < 15)
+          cpi->sf.partition_search_type = FIXED_PARTITION;
+      }
+    }
   }
 
   {
@@ -3136,12 +3323,13 @@ static void encode_frame_internal(VP9_COMP *cpi) {
         for (tile_col = 0; tile_col < tile_cols; tile_col++) {
           TileInfo tile;
           TOKENEXTRA *tp_old = tp;
+          int mi_row;
 
           // For each row of SBs in the frame
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
                mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
-            if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
+            if (sf->use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
               encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
             else
               encode_rd_sb_row(cpi, &tile, mi_row, &tp);
@@ -3156,18 +3344,18 @@ static void encode_frame_internal(VP9_COMP *cpi) {
     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
   }
 
-  if (cpi->sf.skip_encode_sb) {
+  if (sf->skip_encode_sb) {
     int j;
     unsigned int intra_count = 0, inter_count = 0;
     for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
       intra_count += cm->counts.intra_inter[j][0];
       inter_count += cm->counts.intra_inter[j][1];
     }
-    cpi->sf.skip_encode_frame = (intra_count << 2) < inter_count &&
-                                cm->frame_type != KEY_FRAME &&
-                                cm->show_frame;
+    sf->skip_encode_frame = (intra_count << 2) < inter_count &&
+                            cm->frame_type != KEY_FRAME &&
+                            cm->show_frame;
   } else {
-    cpi->sf.skip_encode_frame = 0;
+    sf->skip_encode_frame = 0;
   }
 
 #if 0
@@ -3201,33 +3389,31 @@ void vp9_encode_frame(VP9_COMP *cpi) {
 
   if (cpi->sf.frame_parameter_update) {
     int i;
-    REFERENCE_MODE reference_mode;
-    /*
-     * This code does a single RD pass over the whole frame assuming
-     * either compound, single or hybrid prediction as per whatever has
-     * worked best for that type of frame in the past.
-     * It also predicts whether another coding mode would have worked
-     * better that this coding mode. If that is the case, it remembers
-     * that for subsequent frames.
-     * It does the same analysis for transform size selection also.
-     */
+
+    // This code does a single RD pass over the whole frame assuming
+    // either compound, single or hybrid prediction as per whatever has
+    // worked best for that type of frame in the past.
+    // It also predicts whether another coding mode would have worked
+    // better that this coding mode. If that is the case, it remembers
+    // that for subsequent frames.
+    // It does the same analysis for transform size selection also.
     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
     const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
     const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
 
     /* prediction (compound, single or hybrid) mode selection */
-    if (frame_type == 3 || !cm->allow_comp_inter_inter)
-      reference_mode = SINGLE_REFERENCE;
+    if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter)
+      cm->reference_mode = SINGLE_REFERENCE;
     else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] &&
              mode_thresh[COMPOUND_REFERENCE] >
                  mode_thresh[REFERENCE_MODE_SELECT] &&
              check_dual_ref_flags(cpi) &&
              cpi->static_mb_pct == 100)
-      reference_mode = COMPOUND_REFERENCE;
+      cm->reference_mode = COMPOUND_REFERENCE;
     else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT])
-      reference_mode = SINGLE_REFERENCE;
+      cm->reference_mode = SINGLE_REFERENCE;
     else
-      reference_mode = REFERENCE_MODE_SELECT;
+      cm->reference_mode = REFERENCE_MODE_SELECT;
 
     if (cm->interp_filter == SWITCHABLE) {
       if (frame_type != ALTREF_FRAME &&
@@ -3243,9 +3429,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       }
     }
 
-    cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
-    cm->reference_mode = reference_mode;
-
     encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
@@ -3324,10 +3507,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       }
     }
   } else {
-    cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
     cm->reference_mode = SINGLE_REFERENCE;
-    // Force the usage of the BILINEAR interp_filter.
-    cm->interp_filter = BILINEAR;
+    cm->interp_filter = SWITCHABLE;
     encode_frame_internal(cpi);
   }
 }
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index 72343cdf2..131e93201 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -20,6 +20,12 @@ struct macroblock;
 struct yv12_buffer_config;
 struct VP9_COMP;
 
+typedef struct {
+  unsigned int sse;
+  int sum;
+  unsigned int var;
+} diff;
+
 void vp9_setup_src_planes(struct macroblock *x,
                           const struct yv12_buffer_config *src,
                           int mi_row, int mi_col);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index dc329fd1e..db32ef8c9 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -258,13 +258,14 @@ static void avg_stats(FIRSTPASS_STATS *section) {
 static double calculate_modified_err(const VP9_COMP *cpi,
                                      const FIRSTPASS_STATS *this_frame) {
   const struct twopass_rc *twopass = &cpi->twopass;
+  const SVC *const svc = &cpi->svc;
   const FIRSTPASS_STATS *stats;
   double av_err;
   double modified_error;
 
-  if (cpi->svc.number_spatial_layers > 1 &&
-      cpi->svc.number_temporal_layers == 1) {
-    twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+  if (svc->number_spatial_layers > 1 &&
+      svc->number_temporal_layers == 1) {
+    twopass = &svc->layer_context[svc->spatial_layer_id].twopass;
   }
 
   stats = &twopass->total_stats;
@@ -335,15 +336,13 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) {
 }
 
 // This function returns the maximum target rate per frame.
-static int frame_max_bits(const VP9_COMP *cpi) {
-  int64_t max_bits =
-    ((int64_t)cpi->rc.av_per_frame_bandwidth *
-     (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100;
-
+static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) {
+  int64_t max_bits = ((int64_t)rc->av_per_frame_bandwidth *
+                          (int64_t)oxcf->two_pass_vbrmax_section) / 100;
   if (max_bits < 0)
     max_bits = 0;
-  else if (max_bits > cpi->rc.max_frame_bandwidth)
-    max_bits = cpi->rc.max_frame_bandwidth;
+  else if (max_bits > rc->max_frame_bandwidth)
+    max_bits = rc->max_frame_bandwidth;
 
   return (int)max_bits;
 }
@@ -916,9 +915,8 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
   if (section_target_bandwitdh <= 0)
     return rc->worst_quality;          // Highest value allowed
 
-  target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20)
-                              ? (512 * section_target_bandwitdh) / num_mbs
-                              : 512 * (section_target_bandwitdh / num_mbs);
+  target_norm_bits_per_mb =
+      ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs;
 
   // Try and pick a max Q that will be high enough to encode the
   // content at the given rate.
@@ -1059,8 +1057,8 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm,
 // Function to test for a condition where a complex transition is followed
 // by a static section. For example in slide shows where there is a fade
 // between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
-                                      int still_interval,
+static int detect_transition_to_still(struct twopass_rc *twopass,
+                                      int frame_interval, int still_interval,
                                       double loop_decay_rate,
                                       double last_decay_rate) {
   int trans_to_still = 0;
@@ -1072,19 +1070,19 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
       loop_decay_rate >= 0.999 &&
       last_decay_rate < 0.9) {
     int j;
-    const FIRSTPASS_STATS *position = cpi->twopass.stats_in;
+    const FIRSTPASS_STATS *position = twopass->stats_in;
     FIRSTPASS_STATS tmp_next_frame;
 
     // Look ahead a few frames to see if static condition persists...
     for (j = 0; j < still_interval; ++j) {
-      if (EOF == input_stats(&cpi->twopass, &tmp_next_frame))
+      if (EOF == input_stats(twopass, &tmp_next_frame))
         break;
 
       if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999)
         break;
     }
 
-    reset_fpf_position(&cpi->twopass, position);
+    reset_fpf_position(twopass, position);
 
     // Only if it does do we signal a transition to still.
     if (j == still_interval)
@@ -1406,9 +1404,11 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
 
 // Analyse and define a gf/arf group.
 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+  RATE_CONTROL *const rc = &cpi->rc;
+  VP9_CONFIG *const oxcf = &cpi->oxcf;
+  struct twopass_rc *const twopass = &cpi->twopass;
   FIRSTPASS_STATS next_frame = { 0 };
   const FIRSTPASS_STATS *start_pos;
-  struct twopass_rc *const twopass = &cpi->twopass;
   int i;
   double boost_score = 0.0;
   double old_boost_score = 0.0;
@@ -1427,16 +1427,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double mv_in_out_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
   double mv_ratio_accumulator_thresh;
-  const int max_bits = frame_max_bits(cpi);  // Max bits for a single frame.
-
-  unsigned int allow_alt_ref = cpi->oxcf.play_alternate &&
-                               cpi->oxcf.lag_in_frames;
+  // Max bits for a single frame.
+  const int max_bits = frame_max_bits(rc, oxcf);
+  unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames;
 
   int f_boost = 0;
   int b_boost = 0;
   int flash_detected;
   int active_max_gf_interval;
-  RATE_CONTROL *const rc = &cpi->rc;
 
   twopass->gf_group_bits = 0;
 
@@ -1508,7 +1506,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // Break clause to detect very still sections after motion. For example,
       // a static image after a fade or other transition.
-      if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
+      if (detect_transition_to_still(twopass, i, 5, loop_decay_rate,
                                      last_loop_decay_rate)) {
         allow_alt_ref = 0;
         break;
@@ -1792,36 +1790,36 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
 // Allocate bits to a normal frame that is neither a gf an arf or a key frame.
 static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+  struct twopass_rc *twopass = &cpi->twopass;
+  // For a single frame.
+  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
+  // Calculate modified prediction error used in bit allocation.
+  const double modified_err = calculate_modified_err(cpi, this_frame);
   int target_frame_size;
-  double modified_err;
   double err_fraction;
-  const int max_bits = frame_max_bits(cpi);  // Max for a single frame.
-
-  // Calculate modified prediction error used in bit allocation.
-  modified_err = calculate_modified_err(cpi, this_frame);
 
-  if (cpi->twopass.gf_group_error_left > 0)
+  if (twopass->gf_group_error_left > 0)
     // What portion of the remaining GF group error is used by this frame.
-    err_fraction = modified_err / cpi->twopass.gf_group_error_left;
+    err_fraction = modified_err / twopass->gf_group_error_left;
   else
     err_fraction = 0.0;
 
   // How many of those bits available for allocation should we give it?
-  target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction);
+  target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction);
 
   // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at
   // the top end.
   target_frame_size = clamp(target_frame_size, 0,
-                            MIN(max_bits, (int)cpi->twopass.gf_group_bits));
+                            MIN(max_bits, (int)twopass->gf_group_bits));
 
   // Adjust error and bits remaining.
-  cpi->twopass.gf_group_error_left -= (int64_t)modified_err;
+  twopass->gf_group_error_left -= (int64_t)modified_err;
 
   // Per frame bit target for this frame.
   vp9_rc_set_frame_target(cpi, target_frame_size);
 }
 
-static int test_candidate_kf(VP9_COMP *cpi,
+static int test_candidate_kf(struct twopass_rc *twopass,
                              const FIRSTPASS_STATS *last_frame,
                              const FIRSTPASS_STATS *this_frame,
                              const FIRSTPASS_STATS *next_frame) {
@@ -1842,7 +1840,7 @@ static int test_candidate_kf(VP9_COMP *cpi,
          ((next_frame->intra_error /
            DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) {
     int i;
-    const FIRSTPASS_STATS *start_pos = cpi->twopass.stats_in;
+    const FIRSTPASS_STATS *start_pos = twopass->stats_in;
     FIRSTPASS_STATS local_next_frame = *next_frame;
     double boost_score = 0.0;
     double old_boost_score = 0.0;
@@ -1879,7 +1877,7 @@ static int test_candidate_kf(VP9_COMP *cpi,
       old_boost_score = boost_score;
 
       // Get the next frame details
-      if (EOF == input_stats(&cpi->twopass, &local_next_frame))
+      if (EOF == input_stats(twopass, &local_next_frame))
         break;
     }
 
@@ -1889,7 +1887,7 @@ static int test_candidate_kf(VP9_COMP *cpi,
       is_viable_kf = 1;
     } else {
       // Reset the file position
-      reset_fpf_position(&cpi->twopass, start_pos);
+      reset_fpf_position(twopass, start_pos);
 
       is_viable_kf = 0;
     }
@@ -1902,16 +1900,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int i, j;
   RATE_CONTROL *const rc = &cpi->rc;
   struct twopass_rc *const twopass = &cpi->twopass;
-  FIRSTPASS_STATS last_frame;
   const FIRSTPASS_STATS first_frame = *this_frame;
-  FIRSTPASS_STATS next_frame;
   const FIRSTPASS_STATS *start_position = twopass->stats_in;
-
+  FIRSTPASS_STATS next_frame;
+  FIRSTPASS_STATS last_frame;
   double decay_accumulator = 1.0;
   double zero_motion_accumulator = 1.0;
-  double boost_score = 0;
-  double loop_decay_rate;
-
+  double boost_score = 0.0;
   double kf_mod_err = 0.0;
   double kf_group_err = 0.0;
   double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
@@ -1949,8 +1944,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // Provided that we are not at the end of the file...
     if (cpi->oxcf.auto_key &&
         lookup_next_frame_stats(twopass, &next_frame) != EOF) {
+      double loop_decay_rate;
+
       // Check for a scene cut.
-      if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
+      if (test_candidate_kf(twopass, &last_frame, this_frame, &next_frame))
         break;
 
       // How fast is the prediction quality decaying?
@@ -1966,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // Special check for transition or high motion followed by a
       // static scene.
-      if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i,
+      if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i,
                                      loop_decay_rate, decay_accumulator))
         break;
 
@@ -2019,7 +2016,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Calculate the number of bits that should be assigned to the kf group.
   if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
     // Maximum number of bits for a single normal frame (not key frame).
-    const int max_bits = frame_max_bits(cpi);
+    const int max_bits = frame_max_bits(rc, &cpi->oxcf);
 
     // Maximum number of bits allocated to the key frame group.
     int64_t max_grp_bits;
@@ -2071,10 +2068,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // How fast is prediction quality decaying.
       if (!detect_flash(twopass, 0)) {
-        loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
+        const double loop_decay_rate = get_prediction_decay_rate(&cpi->common,
+                                                                 &next_frame);
         decay_accumulator *= loop_decay_rate;
-        decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
-                              ? MIN_DECAY_FACTOR : decay_accumulator;
+        decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR);
       }
 
       boost_score += (decay_accumulator * r);
@@ -2105,7 +2102,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   if (1) {
     int kf_boost = (int)boost_score;
     int allocation_chunks;
-    int alt_kf_bits;
 
     if (kf_boost < (rc->frames_to_key * 3))
       kf_boost = (rc->frames_to_key * 3);
@@ -2139,14 +2135,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
     // Prevent overflow.
     if (kf_boost > 1028) {
-      int divisor = kf_boost >> 10;
+      const int divisor = kf_boost >> 10;
       kf_boost /= divisor;
       allocation_chunks /= divisor;
     }
 
-    twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0
-           : twopass->kf_group_bits;
-
+    twopass->kf_group_bits = MAX(0, twopass->kf_group_bits);
     // Calculate the number of bits to be spent on the key frame.
     twopass->kf_bits = (int)((double)kf_boost *
         ((double)twopass->kf_group_bits / allocation_chunks));
@@ -2156,11 +2150,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // then use an alternate calculation based on the kf error score
     // which should give a smaller key frame.
     if (kf_mod_err < kf_group_err / rc->frames_to_key) {
-      double  alt_kf_grp_bits = ((double)twopass->bits_left *
+      double alt_kf_grp_bits = ((double)twopass->bits_left *
          (kf_mod_err * (double)rc->frames_to_key) /
          DOUBLE_DIVIDE_CHECK(twopass->modified_error_left));
 
-      alt_kf_bits = (int)((double)kf_boost *
+      const int alt_kf_bits = (int)((double)kf_boost *
                           (alt_kf_grp_bits / (double)allocation_chunks));
 
       if (twopass->kf_bits > alt_kf_bits)
@@ -2169,12 +2163,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       // Else if it is much harder than other frames in the group make sure
       // it at least receives an allocation in keeping with its relative
       // error score.
-      alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
+      const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
                DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)));
 
-      if (alt_kf_bits > twopass->kf_bits) {
+      if (alt_kf_bits > twopass->kf_bits)
         twopass->kf_bits = alt_kf_bits;
-      }
     }
     twopass->kf_group_bits -= twopass->kf_bits;
     // Per frame bit target for this frame.
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index ede802a3b..f7a02a4a7 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -981,66 +981,49 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                              const vp9_variance_fn_ptr_t *fn_ptr,
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
-  int i, j, step;
-
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *in_what;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address;
-
-  int bestsad = INT_MAX;
-  int best_site = 0;
-  int last_site = 0;
-
-  int ref_row, ref_col;
-
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   // search_param determines the length of the initial step and hence the number
   // of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
   // (MAX_FIRST_STEP/4) pel... etc.
   const search_site *const ss = &x->ss[search_param * x->searches_per_step];
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+  const uint8_t *best_address;
+  int best_sad = INT_MAX;
+  int best_site = 0;
+  int last_site = 0;
+  int i, j, step;
 
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  ref_row = ref_mv->row;
-  ref_col = ref_mv->col;
+  best_address = get_buf_from_mv(in_what, ref_mv);
   *num00 = 0;
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
-
-  // Work out the start point for the search
-  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
-  best_address = in_what;
+  *best_mv = *ref_mv;
 
   // Check the starting position
-  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+  best_sad = fn_ptr->sdf(what->buf, what->stride,
+                        in_what->buf, in_what->stride, 0x7fffffff) +
+      mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
 
   i = 1;
 
   for (step = 0; step < tot_steps; step++) {
     for (j = 0; j < x->searches_per_step; j++) {
-      const MV this_mv = {best_mv->row + ss[i].mv.row,
-                          best_mv->col + ss[i].mv.col};
-      if (is_mv_in(x, &this_mv)) {
-        const uint8_t *const check_here = ss[i].offset + best_address;
-        int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                              bestsad);
-
-        if (thissad < bestsad) {
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
-          if (thissad < bestsad) {
-            bestsad = thissad;
+      const MV mv = {best_mv->row + ss[i].mv.row,
+                     best_mv->col + ss[i].mv.col};
+      if (is_mv_in(x, &mv)) {
+       int sad = fn_ptr->sdf(what->buf, what->stride,
+                             best_address + ss[i].offset, in_what->stride,
+                             best_sad);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                                sad_per_bit);
+          if (sad < best_sad) {
+            best_sad = sad;
             best_site = i;
           }
         }
@@ -1059,14 +1042,14 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
         const MV this_mv = {best_mv->row + ss[best_site].mv.row,
                             best_mv->col + ss[best_site].mv.col};
         if (is_mv_in(x, &this_mv)) {
-          const uint8_t *const check_here = ss[best_site].offset + best_address;
-          int thissad = fn_ptr->sdf(what, what_stride, check_here,
-                                    in_what_stride, bestsad);
-          if (thissad < bestsad) {
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
-            if (thissad < bestsad) {
-              bestsad = thissad;
+          int sad = fn_ptr->sdf(what->buf, what->stride,
+                                best_address + ss[best_site].offset,
+                                in_what->stride, best_sad);
+          if (sad < best_sad) {
+            sad += mvsad_err_cost(&this_mv, &fcenter_mv,
+                                  mvjsadcost, mvsadcost, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
               best_mv->row += ss[best_site].mv.row;
               best_mv->col += ss[best_site].mv.col;
               best_address += ss[best_site].offset;
@@ -1077,11 +1060,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
         break;
       };
 #endif
-    } else if (best_address == in_what) {
+    } else if (best_address == in_what->buf) {
       (*num00)++;
     }
   }
-  return bestsad;
+  return best_sad;
 }
 
 int vp9_diamond_search_sadx4(const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 836f3e1e5..3619ec89e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -41,6 +41,9 @@
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_segmentation.h"
 #include "vp9/encoder/vp9_speed_features.h"
+#if CONFIG_INTERNAL_STATS
+#include "vp9/encoder/vp9_ssim.h"
+#endif
 #include "vp9/encoder/vp9_temporal_filter.h"
 #include "vp9/encoder/vp9_resize.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
@@ -63,19 +66,6 @@ void vp9_coef_tree_initialize();
 #define MAX_MB_RATE 250
 #define MAXRATE_1080P 2025000
 
-#if CONFIG_INTERNAL_STATS
-extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source,
-                            YV12_BUFFER_CONFIG *dest, int lumamask,
-                            double *weight);
-
-
-extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source,
-                             YV12_BUFFER_CONFIG *dest, double *ssim_y,
-                             double *ssim_u, double *ssim_v);
-
-
-#endif
-
 // #define OUTPUT_YUV_REC
 
 #ifdef OUTPUT_YUV_SRC
@@ -255,56 +245,6 @@ static void restore_coding_context(VP9_COMP *cpi) {
   cm->fc = cc->fc;
 }
 
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a target q value
-int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int start_index = rc->worst_quality;
-  int target_index = rc->worst_quality;
-  int i;
-
-  // Convert the average q value to an index.
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    start_index = i;
-    if (vp9_convert_qindex_to_q(i) >= qstart)
-      break;
-  }
-
-  // Convert the q target to an index
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    target_index = i;
-    if (vp9_convert_qindex_to_q(i) >= qtarget)
-      break;
-  }
-
-  return target_index - start_index;
-}
-
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a value that should equate to the given rate ratio.
-int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int qindex,
-                               double rate_target_ratio) {
-  const FRAME_TYPE frame_type = cpi->common.frame_type;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int target_index = rc->worst_quality;
-  int i;
-
-  // Look up the current projected bits per block for the base index
-  const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0);
-
-  // Find the target bits per mb based on the base value and given ratio.
-  const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
-
-  // Convert the q target to an index
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    target_index = i;
-    if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb )
-      break;
-  }
-
-  return target_index - qindex;
-}
-
 static void configure_static_seg_features(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
@@ -348,7 +288,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
       seg->update_map = 1;
       seg->update_data = 1;
 
-      qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 0.875);
+      qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875);
       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
 
@@ -369,7 +309,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
         seg->update_data = 1;
         seg->abs_delta = SEGMENT_DELTADATA;
 
-        qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 1.125);
+        qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125);
         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
 
@@ -804,7 +744,8 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
 
   cpi->oxcf = *oxcf;
 
-  cm->version = oxcf->version;
+  cm->profile = oxcf->profile;
+  cm->bit_depth = oxcf->bit_depth;
 
   cm->width = oxcf->width;
   cm->height = oxcf->height;
@@ -844,8 +785,14 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
 
-  if (cm->version != oxcf->version)
-    cm->version = oxcf->version;
+  if (cm->profile != oxcf->profile)
+    cm->profile = oxcf->profile;
+  cm->bit_depth = oxcf->bit_depth;
+
+  if (cm->profile <= PROFILE_1)
+    assert(cm->bit_depth == BITS_8);
+  else
+    assert(cm->bit_depth > BITS_8);
 
   cpi->oxcf = *oxcf;
 
@@ -881,10 +828,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
       break;
   }
 
-  cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
-  cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
-  cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
-
   cpi->oxcf.lossless = oxcf->lossless;
   if (cpi->oxcf.lossless) {
     // In lossless mode, make sure right quantizer range and correct transform
@@ -1626,53 +1569,42 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
 
 #endif
 }
+static int64_t get_sse(const uint8_t *a, int a_stride,
+                       const uint8_t *b, int b_stride,
+                       int width, int height) {
+  const int dw = width % 16;
+  const int dh = height % 16;
+  int64_t total_sse = 0;
+  unsigned int sse = 0;
+  int sum = 0;
+  int x, y;
+
+  if (dw > 0) {
+    variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
+             dw, height, &sse, &sum);
+    total_sse += sse;
+  }
 
+  if (dh > 0) {
+    variance(&a[(height - dh) * a_stride], a_stride,
+             &b[(height - dh) * b_stride], b_stride,
+             width - dw, dh, &sse, &sum);
+    total_sse += sse;
+  }
 
-static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride,
-                                 const uint8_t *recon, int recon_stride,
-                                 unsigned int cols, unsigned int rows) {
-  unsigned int row, col;
-  uint64_t total_sse = 0;
-  int diff;
-
-  for (row = 0; row + 16 <= rows; row += 16) {
-    for (col = 0; col + 16 <= cols; col += 16) {
-      unsigned int sse;
-
-      vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse);
+  for (y = 0; y < height / 16; ++y) {
+    const uint8_t *pa = a;
+    const uint8_t *pb = b;
+    for (x = 0; x < width / 16; ++x) {
+      vp9_mse16x16(pa, a_stride, pb, b_stride, &sse);
       total_sse += sse;
-    }
-
-    /* Handle odd-sized width */
-    if (col < cols) {
-      unsigned int border_row, border_col;
-      const uint8_t *border_orig = orig;
-      const uint8_t *border_recon = recon;
 
-      for (border_row = 0; border_row < 16; border_row++) {
-        for (border_col = col; border_col < cols; border_col++) {
-          diff = border_orig[border_col] - border_recon[border_col];
-          total_sse += diff * diff;
-        }
-
-        border_orig += orig_stride;
-        border_recon += recon_stride;
-      }
-    }
-
-    orig += orig_stride * 16;
-    recon += recon_stride * 16;
-  }
-
-  /* Handle odd-sized height */
-  for (; row < rows; row++) {
-    for (col = 0; col < cols; col++) {
-      diff = orig[col] - recon[col];
-      total_sse += diff * diff;
+      pa += 16;
+      pb += 16;
     }
 
-    orig += orig_stride;
-    recon += recon_stride;
+    a += 16 * a_stride;
+    b += 16 * b_stride;
   }
 
   return total_sse;
@@ -1700,9 +1632,9 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
     const int w = widths[i];
     const int h = heights[i];
     const uint32_t samples = w * h;
-    const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i],
-                                          b_planes[i], b_strides[i],
-                                          w, h);
+    const uint64_t sse = get_sse(a_planes[i], a_strides[i],
+                                 b_planes[i], b_strides[i],
+                                 w, h);
     psnr->sse[1 + i] = sse;
     psnr->samples[1 + i] = samples;
     psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);
@@ -2176,7 +2108,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 
   vp9_clear_system_state();
 
-  recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
+  recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
   if (cpi->twopass.total_left_stats.coded_error != 0.0)
     fprintf(f, "%10u %10d %10d %10d %10d %10d "
@@ -2347,7 +2279,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
            rc->this_key_frame_forced &&
            (rc->projected_frame_size < rc->max_frame_bandwidth)) {
         int last_q = q;
-        int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
+        int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
         int high_err_target = cpi->ambient_err;
         int low_err_target = cpi->ambient_err >> 1;
@@ -2704,7 +2636,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // fixed interval. Note the reconstruction error if it is the frame before
   // the force key frame
   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
-    cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
+    cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
   }
 
   // If the encoder forced a KEY_FRAME decision
@@ -2824,15 +2756,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     // Don't increment frame counters if this was an altref buffer
     // update not a real frame
     ++cm->current_video_frame;
-    if (cpi->use_svc) {
-      LAYER_CONTEXT *lc;
-      if (cpi->svc.number_temporal_layers > 1) {
-        lc = &cpi->svc.layer_context[cpi->svc.temporal_layer_id];
-      } else {
-        lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
-      }
-      ++lc->current_video_frame_in_layer;
-    }
+    if (cpi->use_svc)
+      vp9_inc_frame_in_layer(&cpi->svc);
   }
 
   // restore prev_mi
@@ -2908,7 +2833,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
-  if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) {
+  if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) {
     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
                        "Non-4:2:0 color space requires profile >= 1");
     res = -1;
@@ -3466,28 +3391,12 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
   return;
 }
 
-int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
-                    const YV12_BUFFER_CONFIG *reference) {
-  int i, j;
-  int total = 0;
-
-  const uint8_t *src = source->y_buffer;
-  const uint8_t *ref = reference->y_buffer;
-
-  // Loop through the Y plane raw and reconstruction data summing
-  // (square differences)
-  for (i = 0; i < source->y_height; i += 16) {
-    for (j = 0; j < source->y_width; j += 16) {
-      unsigned int sse;
-      total += vp9_mse16x16(src + j, source->y_stride,
-                            ref + j, reference->y_stride, &sse);
-    }
-
-    src += 16 * source->y_stride;
-    ref += 16 * reference->y_stride;
-  }
+int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) {
+  assert(a->y_crop_width == b->y_crop_width);
+  assert(a->y_crop_height == b->y_crop_height);
 
-  return total;
+  return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
+                      a->y_crop_width, a->y_crop_height);
 }
 
 
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 655661835..e30fb02b2 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -186,9 +186,8 @@ typedef enum {
 } AQ_MODE;
 
 typedef struct VP9_CONFIG {
-  int version;  // 4 versions of bitstream defined:
-                //   0 - best quality/slowest decode,
-                //   3 - lowest quality/fastest decode
+  BITSTREAM_PROFILE profile;
+  BIT_DEPTH bit_depth;
   int width;  // width of data passed to the compressor
   int height;  // height of data passed to the compressor
   double framerate;  // set to passed in framerate
@@ -497,6 +496,8 @@ typedef struct VP9_COMP {
 
   SVC svc;
 
+  int use_large_partition_rate;
+
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
   int multi_arf_enabled;
@@ -605,16 +606,10 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
   return mb_rows * mb_cols * (16 * 16 * 3 + 4);
 }
 
-int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
-                    const YV12_BUFFER_CONFIG *reference);
+int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 
 void vp9_alloc_compressor_data(VP9_COMP *cpi);
 
-int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget);
-
-int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index,
-                               double rate_target_ratio);
-
 void vp9_scale_references(VP9_COMP *cpi);
 
 void vp9_update_reference_frames(VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 92ad1e745..3ac85228b 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -35,7 +35,7 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
   int filt_err;
 
   vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame);
-  filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+  filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
 
   // Re-instate the unfiltered frame
   vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 9c7e8c109..f3fe99cdb 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -214,6 +214,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   struct macroblockd_plane *const pd = &xd->plane[0];
   MB_PREDICTION_MODE this_mode, best_mode = ZEROMV;
   MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
+  INTERP_FILTER best_pred_filter = EIGHTTAP;
   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
@@ -236,6 +237,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
   // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
   int mode_idx[MB_MODE_COUNT] = {0};
+  INTERP_FILTER filter_ref = SWITCHABLE;
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
@@ -267,6 +269,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
 
+  if (xd->up_available)
+    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
+  else if (xd->left_available)
+    filter_ref = xd->mi[-1]->mbmi.interp_filter;
+
   for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
     if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
       continue;
@@ -326,9 +333,63 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
       mbmi->mode = this_mode;
       mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
-      vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
 
-      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      // Search for the best prediction filter type, when the resulting
+      // motion vector is at sub-pixel accuracy level for luma component, i.e.,
+      // the last three bits are all zeros.
+      if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
+          ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
+           (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
+        int64_t tmp_rdcost1 = INT64_MAX;
+        int64_t tmp_rdcost2 = INT64_MAX;
+        int64_t tmp_rdcost3 = INT64_MAX;
+        int pf_rate[3];
+        int64_t pf_dist[3];
+
+        mbmi->interp_filter = EIGHTTAP;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP],
+                          &pf_dist[EIGHTTAP]);
+        tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv,
+                             vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP],
+                             pf_dist[EIGHTTAP]);
+
+        mbmi->interp_filter = EIGHTTAP_SHARP;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP],
+                          &pf_dist[EIGHTTAP_SHARP]);
+        tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv,
+                          vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP],
+                          pf_dist[EIGHTTAP_SHARP]);
+
+        mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH],
+                          &pf_dist[EIGHTTAP_SMOOTH]);
+        tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv,
+                          vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH],
+                          pf_dist[EIGHTTAP_SMOOTH]);
+
+        if (tmp_rdcost2 < tmp_rdcost1) {
+          if (tmp_rdcost2 < tmp_rdcost3)
+            mbmi->interp_filter = EIGHTTAP_SHARP;
+          else
+            mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        } else {
+          if (tmp_rdcost1 < tmp_rdcost3)
+            mbmi->interp_filter = EIGHTTAP;
+          else
+            mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        }
+
+        rate = pf_rate[mbmi->interp_filter];
+        dist = pf_dist[mbmi->interp_filter];
+      } else {
+        mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      }
+
       rate += rate_mv;
       rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]]
                                 [INTER_OFFSET(this_mode)];
@@ -339,12 +400,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         *returnrate = rate;
         *returndistortion = dist;
         best_mode = this_mode;
+        best_pred_filter = mbmi->interp_filter;
         best_ref_frame = ref_frame;
       }
     }
   }
 
   mbmi->mode = best_mode;
+  mbmi->interp_filter = best_pred_filter;
   mbmi->ref_frame[0] = best_ref_frame;
   mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
   xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index c36b30b0c..342081644 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -35,9 +35,6 @@
 #define MIN_BPB_FACTOR 0.005
 #define MAX_BPB_FACTOR 50
 
-// Bits Per MB at different Q (Multiplied by 512)
-#define BPER_MB_NORMBITS    9
-
 // Tables relating active max Q to active min Q
 static int kf_low_motion_minq[QINDEX_RANGE];
 static int kf_high_motion_minq[QINDEX_RANGE];
@@ -106,15 +103,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
   return (int)(0.5 + (enumerator * correction_factor / q));
 }
 
-static int estimate_bits_at_q(int frame_kind, int q, int mbs,
+static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
                               double correction_factor) {
-  const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor));
-
-  // Attempt to retain reasonable accuracy without overflow. The cutoff is
-  // chosen such that the maximum product of Bpm and MBs fits 31 bits. The
-  // largest Bpm takes 20 bits.
-  return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs
-                           : (bpm * mbs) >> BPER_MB_NORMBITS;
+  const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor));
+  return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS;
 }
 
 int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
@@ -151,13 +143,12 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
 
 
 // Update the buffer level for higher layers, given the encoded current layer.
-static void update_layer_buffer_level(VP9_COMP *const cpi,
-                                      int encoded_frame_size) {
+static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
   int temporal_layer = 0;
-  int current_temporal_layer = cpi->svc.temporal_layer_id;
+  int current_temporal_layer = svc->temporal_layer_id;
   for (temporal_layer = current_temporal_layer + 1;
-      temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) {
-    LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+      temporal_layer < svc->number_temporal_layers; ++temporal_layer) {
+    LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer];
     RATE_CONTROL *lrc = &lc->rc;
     int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
         encoded_frame_size);
@@ -187,7 +178,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   rc->buffer_level = rc->bits_off_target;
 
   if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_buffer_level(cpi, encoded_frame_size);
+    update_layer_buffer_level(&cpi->svc, encoded_frame_size);
   }
 }
 
@@ -284,6 +275,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
     return cpi->rc.key_frame_rate_correction_factor;
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+        !cpi->rc.is_src_frame_alt_ref &&
         !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
       return cpi->rc.gf_rate_correction_factor;
     else
@@ -296,6 +288,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
     cpi->rc.key_frame_rate_correction_factor = factor;
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+        !cpi->rc.is_src_frame_alt_ref &&
         !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
       cpi->rc.gf_rate_correction_factor = factor;
     else
@@ -304,7 +297,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
 }
 
 void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
-  const int q = cpi->common.base_qindex;
+  const VP9_COMMON *const cm = &cpi->common;
   int correction_factor = 100;
   double rate_correction_factor = get_rate_correction_factor(cpi);
   double adjustment_limit;
@@ -317,8 +310,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
   // Work out how big we would have expected the frame to be at this Q given
   // the current correction factor.
   // Stay in double to avoid int overflow when values are large
-  projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q,
-                                                 cpi->common.MBs,
+  projected_size_based_on_q = estimate_bits_at_q(cm->frame_type,
+                                                 cm->base_qindex, cm->MBs,
                                                  rate_correction_factor);
   // Work out a size correction factor.
   if (projected_size_based_on_q > 0)
@@ -342,20 +335,18 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 
   if (correction_factor > 102) {
     // We are not already at the worst allowable quality
-    correction_factor =
-        (int)(100 + ((correction_factor - 100) * adjustment_limit));
-    rate_correction_factor =
-        ((rate_correction_factor * correction_factor) / 100);
+    correction_factor = (int)(100 + ((correction_factor - 100) *
+                                  adjustment_limit));
+    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
 
     // Keep rate_correction_factor within limits
     if (rate_correction_factor > MAX_BPB_FACTOR)
       rate_correction_factor = MAX_BPB_FACTOR;
   } else if (correction_factor < 99) {
     // We are not already at the best allowable quality
-    correction_factor =
-        (int)(100 - ((100 - correction_factor) * adjustment_limit));
-    rate_correction_factor =
-        ((rate_correction_factor * correction_factor) / 100);
+    correction_factor = (int)(100 - ((100 - correction_factor) *
+                                  adjustment_limit));
+    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
 
     // Keep rate_correction_factor within limits
     if (rate_correction_factor < MIN_BPB_FACTOR)
@@ -376,11 +367,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
 
   // Calculate required scaling factor based on target frame size and size of
   // frame produced using previous Q.
-  if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
-    // Case where we would overflow int
-    target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS;
-  else
-    target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
+    target_bits_per_mb =
+        ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
 
   i = active_best_quality;
 
@@ -447,6 +435,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   // If buffer is below the optimal level, let the active_worst_quality go from
   // ambient Q (at buffer = optimal level) to worst_quality level
   // (at buffer = critical level).
+  const VP9_COMMON *const cm = &cpi->common;
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
   // Buffer level below which we push active_worst to worst_quality.
@@ -454,9 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   int64_t buff_lvl_step = 0;
   int adjustment = 0;
   int active_worst_quality;
-  if (cpi->common.frame_type == KEY_FRAME)
+  if (cm->frame_type == KEY_FRAME)
     return rc->worst_quality;
-  if (cpi->common.current_video_frame > 1)
+  if (cm->current_video_frame > 1)
     active_worst_quality = MIN(rc->worst_quality,
                                rc->avg_frame_qindex[INTER_FRAME] * 5 / 4);
   else
@@ -510,7 +499,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
     if (rc->this_key_frame_forced) {
       int qindex = rc->last_boosted_qindex;
       double last_boosted_q = vp9_convert_qindex_to_q(qindex);
-      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
+      int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
                                             (last_boosted_q * 0.75));
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
     } else if (cm->current_video_frame > 0) {
@@ -532,8 +521,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
-                                                   q_adj_factor);
+      active_best_quality += vp9_compute_qdelta(rc, q_val,
+                                                q_val * q_adj_factor);
     }
   } else if (!rc->is_src_frame_alt_ref &&
              !cpi->use_svc &&
@@ -589,7 +578,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
                           active_best_quality, active_worst_quality);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
-      if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+      if (rc->this_frame_target >= rc->max_frame_bandwidth)
         *top_index = q;
       else
         q = *top_index;
@@ -622,8 +611,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
     if (rc->this_key_frame_forced) {
       int qindex = rc->last_boosted_qindex;
       double last_boosted_q = vp9_convert_qindex_to_q(qindex);
-      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
-                                            (last_boosted_q * 0.75));
+      int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
+                                            last_boosted_q * 0.75);
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
     } else if (cm->current_video_frame > 0) {
       // not first frame of one pass and kf_boost is set
@@ -644,15 +633,15 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
-                                                   q_adj_factor);
+      active_best_quality += vp9_compute_qdelta(rc, q_val,
+                                                q_val * q_adj_factor);
     }
 #else
     double current_q;
     // Force the KF quantizer to be 30% of the active_worst_quality.
     current_q = vp9_convert_qindex_to_q(active_worst_quality);
     active_best_quality = active_worst_quality
-        + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
+        + vp9_compute_qdelta(rc, current_q, current_q * 0.3);
 #endif
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -755,7 +744,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
                           active_best_quality, active_worst_quality);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
-      if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+      if (rc->this_frame_target >= rc->max_frame_bandwidth)
         *top_index = q;
       else
         q = *top_index;
@@ -771,7 +760,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
     assert(level >= 0);
     new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
     q = active_worst_quality +
-        vp9_compute_qdelta(cpi, current_q, new_q);
+        vp9_compute_qdelta(rc, current_q, new_q);
 
     *bottom_index = q;
     *top_index    = q;
@@ -804,8 +793,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
     if (rc->this_key_frame_forced) {
       int qindex = rc->last_boosted_qindex;
       double last_boosted_q = vp9_convert_qindex_to_q(qindex);
-      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
-                                            (last_boosted_q * 0.75));
+      int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
+                                            last_boosted_q * 0.75);
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
     } else {
       // Not forced keyframe.
@@ -829,15 +818,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
-                                                   q_adj_factor);
+      active_best_quality += vp9_compute_qdelta(rc, q_val,
+                                                q_val * q_adj_factor);
     }
 #else
     double current_q;
     // Force the KF quantizer to be 30% of the active_worst_quality.
     current_q = vp9_convert_qindex_to_q(active_worst_quality);
     active_best_quality = active_worst_quality
-        + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
+        + vp9_compute_qdelta(rc, current_q, current_q * 0.3);
 #endif
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -954,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
     assert(level >= 0);
     new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
     q = active_worst_quality +
-        vp9_compute_qdelta(cpi, current_q, new_q);
+        vp9_compute_qdelta(rc, current_q, new_q);
 
     *bottom_index = q;
     *top_index    = q;
@@ -970,8 +959,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
 }
 
 int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
-                             int *bottom_index,
-                             int *top_index) {
+                             int *bottom_index, int *top_index) {
   int q;
   if (cpi->pass == 0) {
     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -1039,16 +1027,17 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) {
 
 static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
   // this frame refreshes means next frames don't unless specified by user
-  cpi->rc.frames_since_golden = 0;
+  RATE_CONTROL *const rc = &cpi->rc;
+  rc->frames_since_golden = 0;
 
 #if CONFIG_MULTIPLE_ARF
   if (!cpi->multi_arf_enabled)
 #endif
     // Clear the alternate reference update pending flag.
-    cpi->rc.source_alt_ref_pending = 0;
+    rc->source_alt_ref_pending = 0;
 
   // Set the alternate reference frame active flag
-  cpi->rc.source_alt_ref_active = 1;
+  rc->source_alt_ref_active = 1;
 }
 
 static void update_golden_frame_stats(VP9_COMP *cpi) {
@@ -1077,6 +1066,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
 
 void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   VP9_COMMON *const cm = &cpi->common;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
 
   cm->last_frame_type = cm->frame_type;
@@ -1086,7 +1076,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   // Post encode loop adjustment of Q prediction.
   vp9_rc_update_rate_correction_factors(
       cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
-            cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
+            oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
@@ -1095,7 +1085,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
         3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
       (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
-      !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
+      !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
     rc->last_q[2] = cm->base_qindex;
     rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -1145,7 +1135,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
 
   rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
 
-  if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame &&
+  if (oxcf->play_alternate && cpi->refresh_alt_ref_frame &&
       (cm->frame_type != KEY_FRAME))
     // Update the alternate reference frame stats as appropriate.
     update_alt_ref_frame_stats(cpi);
@@ -1238,18 +1228,19 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
 static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
+  const SVC *const svc = &cpi->svc;
   const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
   const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
   int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
                              FRAME_OVERHEAD_BITS);
   int target = rc->av_per_frame_bandwidth;
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+  if (svc->number_temporal_layers > 1 &&
+      oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
     // Note that for layers, av_per_frame_bandwidth is the cumulative
     // per-frame-bandwidth. For the target size of this frame, use the
     // layer average frame size (i.e., non-cumulative per-frame-bw).
-    int current_temporal_layer = cpi->svc.temporal_layer_id;
-    const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+    int current_temporal_layer = svc->temporal_layer_id;
+    const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer];
     target = lc->avg_frame_size;
     min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
   }
@@ -1332,3 +1323,46 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   rc->frames_till_gf_update_due = INT_MAX;
   rc->baseline_gf_interval = INT_MAX;
 }
+
+int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) {
+  int start_index = rc->worst_quality;
+  int target_index = rc->worst_quality;
+  int i;
+
+  // Convert the average q value to an index.
+  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+    start_index = i;
+    if (vp9_convert_qindex_to_q(i) >= qstart)
+      break;
+  }
+
+  // Convert the q target to an index
+  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+    target_index = i;
+    if (vp9_convert_qindex_to_q(i) >= qtarget)
+      break;
+  }
+
+  return target_index - start_index;
+}
+
+int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
+                               int qindex, double rate_target_ratio) {
+  int target_index = rc->worst_quality;
+  int i;
+
+  // Look up the current projected bits per block for the base index
+  const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0);
+
+  // Find the target bits per mb based on the base value and given ratio.
+  const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
+
+  // Convert the q target to an index
+  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+    target_index = i;
+    if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb )
+      break;
+  }
+
+  return target_index - qindex;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 99e4b1639..7693c2b13 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -22,6 +22,9 @@ extern "C" {
 
 #define FRAME_OVERHEAD_BITS 200
 
+// Bits Per MB at different Q (Multiplied by 512)
+#define BPER_MB_NORMBITS    9
+
 typedef struct {
   // Rate targetting variables
   int this_frame_target;
@@ -163,6 +166,15 @@ int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi,
 // This function is called only from the vp9_rc_get_..._params() functions.
 void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target);
 
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a target q value
+int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget);
+
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a value that should equate to the given rate ratio.
+int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
+                               int qindex, double rate_target_ratio);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1066f6a17..f4ea7cd55 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2358,7 +2358,7 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
 }
 
-static INLINE int get_switchable_rate(const MACROBLOCK *x) {
+int vp9_get_switchable_rate(const MACROBLOCK *x) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
@@ -2815,7 +2815,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int j;
         int64_t rs_rd;
         mbmi->interp_filter = i;
-        rs = get_switchable_rate(x);
+        rs = vp9_get_switchable_rate(x);
         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
 
         if (i > 0 && intpel_mv) {
@@ -2885,7 +2885,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   // Set the appropriate filter
   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
       cm->interp_filter : *best_filter;
-  rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
+  rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0;
 
   if (pred_exists) {
     if (best_needs_copy) {
@@ -2915,7 +2915,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (cm->interp_filter == SWITCHABLE)
-    *rate2 += get_switchable_rate(x);
+    *rate2 += vp9_get_switchable_rate(x);
 
   if (!is_comp_pred) {
     if (!x->in_active_map) {
@@ -4072,7 +4072,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
             if (tmp_rd == INT64_MAX)
               continue;
-            rs = get_switchable_rate(x);
+            rs = vp9_get_switchable_rate(x);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
             cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
             cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
@@ -4153,7 +4153,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       distortion2 += distortion;
 
       if (cm->interp_filter == SWITCHABLE)
-        rate2 += get_switchable_rate(x);
+        rate2 += vp9_get_switchable_rate(x);
 
       if (!mode_excluded)
         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 6968fa604..a01dbd4d3 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -40,6 +40,8 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
                                   unsigned int qstep, int *rate,
                                   int64_t *dist);
 
+int vp9_get_switchable_rate(const MACROBLOCK *x);
+
 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
                             const TileInfo *const tile,
                             MV_REFERENCE_FRAME ref_frame,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index b79e15979..d6b6174fa 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -209,6 +209,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
     sf->use_square_partition_only = 1;
     sf->disable_filter_search_var_thresh = 100;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
+    sf->constrain_copy_partition = 1;
     sf->use_uv_intra_rd_estimate = 1;
     sf->skip_encode_sb = 1;
     sf->subpel_iters_per_step = 1;
@@ -264,7 +265,11 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
   }
 
   if (speed >= 6) {
-    sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
+    // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
+    sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
+    sf->search_type_check_frequency = 50;
+    sf->source_var_thresh = 360;
+
     sf->use_nonrd_pick_mode = 1;
     sf->search_method = FAST_DIAMOND;
   }
@@ -310,6 +315,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->min_partition_size = BLOCK_4X4;
   sf->adjust_partitioning_from_last_frame = 0;
   sf->last_partitioning_redo_frequency = 4;
+  sf->constrain_copy_partition = 0;
   sf->disable_split_mask = 0;
   sf->mode_search_skip_flags = 0;
   sf->force_frame_boost = 0;
@@ -336,6 +342,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   // This setting only takes effect when partition_search_type is set
   // to FIXED_PARTITION.
   sf->always_this_block_size = BLOCK_16X16;
+  sf->search_type_check_frequency = 50;
+  sf->source_var_thresh = 100;
 
   // Recode loop tolerence %.
   sf->recode_tolerance = 25;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 922f8803e..72f548a04 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -110,7 +110,10 @@ typedef enum {
 
   // Use an arbitrary partitioning scheme based on source variance within
   // a 64X64 SB
-  VAR_BASED_PARTITION
+  VAR_BASED_PARTITION,
+
+  // Use non-fixed partitions based on source variance
+  SOURCE_VAR_BASED_PARTITION
 } PARTITION_SEARCH_TYPE;
 
 typedef enum {
@@ -245,6 +248,12 @@ typedef struct {
   // use_lastframe_partitioning is set.
   int last_partitioning_redo_frequency;
 
+  // This enables constrained copy partitioning, which, given an input block
+  // size bsize, will copy previous partition for partitions less than bsize,
+  // otherwise bsize partition is used. bsize is currently set to 16x16.
+  // Used for the case where motion is detected in superblock.
+  int constrain_copy_partition;
+
   // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
   // it always, to allow it for only Last frame and Intra, disable it for all
   // inter modes or to enable it always.
@@ -329,6 +338,13 @@ typedef struct {
   // used in inter frames.
   // TODO(aconverse): Fold this into one of the other many mode skips
   BLOCK_SIZE max_intra_bsize;
+
+  // The frequency that we check if SOURCE_VAR_BASED_PARTITION or
+  // FIXED_PARTITION search type should be used.
+  int search_type_check_frequency;
+
+  // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
+  int source_var_thresh;
 } SPEED_FEATURES;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c
index 1435191d0..026e6a8fd 100644
--- a/vp9/encoder/vp9_ssim.c
+++ b/vp9/encoder/vp9_ssim.c
@@ -8,8 +8,9 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp9_rtcd.h"
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_ssim.h"
 
 void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
                             int rp, unsigned long *sum_s, unsigned long *sum_r,
diff --git a/vp9/encoder/vp9_ssim.h b/vp9/encoder/vp9_ssim.h
new file mode 100644
index 000000000..a581c2c23
--- /dev/null
+++ b/vp9/encoder/vp9_ssim.h
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_SSIM_H_
+#define VP9_ENCODER_VP9_SSIM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "vpx_scale/yv12config.h"
+
+double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+                     int lumamask, double *weight);
+
+double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+                      double *ssim_y, double *ssim_u, double *ssim_v);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_SSIM_H_
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index caa0ec0e3..c2b6263f0 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -32,8 +32,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
     LAYER_CONTEXT *const lc = &svc->layer_context[layer];
     RATE_CONTROL *const lrc = &lc->rc;
     lc->current_video_frame_in_layer = 0;
-    lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
-    lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q];
+    lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
+    lrc->ni_av_qi = oxcf->worst_allowed_q;
     lrc->total_actual_bits = 0;
     lrc->total_target_vs_actual = 0;
     lrc->ni_tot_qi = 0;
@@ -47,12 +47,12 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
 
     if (svc->number_temporal_layers > 1) {
       lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
-      lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
+      lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
     } else {
       lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
-      lrc->last_q[0] = q_trans[oxcf->best_allowed_q];
-      lrc->last_q[1] = q_trans[oxcf->best_allowed_q];
-      lrc->last_q[2] = q_trans[oxcf->best_allowed_q];
+      lrc->last_q[0] = oxcf->best_allowed_q;
+      lrc->last_q[1] = oxcf->best_allowed_q;
+      lrc->last_q[2] = oxcf->best_allowed_q;
     }
 
     lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level),
@@ -215,3 +215,10 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
   }
   svc->spatial_layer_id = 0;
 }
+
+void vp9_inc_frame_in_layer(SVC *svc) {
+  LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1)
+      ? &svc->layer_context[svc->temporal_layer_id]
+      : &svc->layer_context[svc->spatial_layer_id];
+  ++lc->current_video_frame_in_layer;
+}
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index e859a2fd5..2abed3055 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -70,6 +70,9 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi);
 // Initialize second pass rc for spatial svc.
 void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
 
+// Increment number of video frames in layer
+void vp9_inc_frame_in_layer(SVC *svc);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 996f730ef..71867a938 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
   return (var - (((int64_t)avg * avg) >> 10));
 }
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
+                             const uint8_t *ref_ptr, int ref_stride,
+                             unsigned int *sse, int *sum) {
+  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
+}
+
 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
                                  int  source_stride,
                                  const uint8_t *ref_ptr,
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index dfcec783c..152e1f46e 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -38,6 +38,7 @@ struct vp9_extracfg {
   unsigned int                frame_parallel_decoding_mode;
   AQ_MODE                     aq_mode;
   unsigned int                frame_periodic_boost;
+  BIT_DEPTH                   bit_depth;
 };
 
 struct extraconfig_map {
@@ -67,6 +68,7 @@ static const struct extraconfig_map extracfg_map[] = {
       0,                          // frame_parallel_decoding_mode
       NO_AQ,                      // aq_mode
       0,                          // frame_periodic_delta_q
+      BITS_8,                     // Bit depth
     }
   }
 };
@@ -252,6 +254,12 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
         ERROR("rc_twopass_stats_in missing EOS stats packet");
     }
   }
+  if (cfg->g_profile <= (unsigned int)PROFILE_1 &&
+      extra_cfg->bit_depth > BITS_8)
+    ERROR("High bit-depth not supported in profile < 2");
+  if (cfg->g_profile > (unsigned int)PROFILE_1 &&
+      extra_cfg->bit_depth == BITS_8)
+    ERROR("Bit-depth 8 not supported in profile > 1");
 
   return VPX_CODEC_OK;
 }
@@ -277,11 +285,14 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
 }
 
 
-static vpx_codec_err_t set_encoder_config(VP9_CONFIG *oxcf,
-    const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) {
-  oxcf->version = cfg->g_profile;
+static vpx_codec_err_t set_encoder_config(
+    VP9_CONFIG *oxcf,
+    const vpx_codec_enc_cfg_t *cfg,
+    const struct vp9_extracfg *extra_cfg) {
+  oxcf->profile = cfg->g_profile;
   oxcf->width   = cfg->g_w;
   oxcf->height  = cfg->g_h;
+  oxcf->bit_depth = extra_cfg->bit_depth;
   // guess a frame rate if out of whack, use 30
   oxcf->framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
   if (oxcf->framerate > 180)
@@ -313,9 +324,9 @@ static vpx_codec_err_t set_encoder_config(VP9_CONFIG *oxcf,
   oxcf->target_bandwidth         = cfg->rc_target_bitrate;
   oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
 
-  oxcf->best_allowed_q          = cfg->rc_min_quantizer;
-  oxcf->worst_allowed_q         = cfg->rc_max_quantizer;
-  oxcf->cq_level                = extra_cfg->cq_level;
+  oxcf->best_allowed_q          = q_trans[cfg->rc_min_quantizer];
+  oxcf->worst_allowed_q         = q_trans[cfg->rc_max_quantizer];
+  oxcf->cq_level                = q_trans[extra_cfg->cq_level];
   oxcf->fixed_q = -1;
 
   oxcf->under_shoot_pct         = cfg->rc_undershoot_pct;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 2163b7bae..5ed7484ab 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -33,7 +33,7 @@ struct vpx_codec_alg_priv {
   vpx_codec_dec_cfg_t     cfg;
   vp9_stream_info_t       si;
   int                     decoder_init;
-  struct VP9Decompressor *pbi;
+  struct VP9Decoder *pbi;
   int                     postproc_cfg_set;
   vp8_postproc_cfg_t      postproc_cfg;
 #if CONFIG_POSTPROC_VISUALIZER
@@ -89,6 +89,8 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
     ctx->pbi = NULL;
   }
 
+  vpx_free(ctx);
+
   return VPX_CODEC_OK;
 }
 
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 21a388e51..da6c0f8b6 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -68,6 +68,7 @@ VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
 VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
 VP9_CX_SRCS-yes += encoder/vp9_variance.c