40 files changed, 2822 insertions, 1198 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 83f480a42..b43a4ec18 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1091,6 +1091,15 @@ EOF
                 # Skip the check by setting AS arbitrarily
                 AS=msvs
                 msvs_arch_dir=x86-msvs
+                vc_version=${tgt_cc##vs}
+                case $vc_version in
+                    7|8|9)
+                         echo "${tgt_cc} does not support avx/avx2, disabling....."
+                         RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx --disable-avx2 "
+                         soft_disable avx
+                         soft_disable avx2
+                    ;;
+                esac
             ;;
         esac
 
diff --git a/examples.mk b/examples.mk
index 88327fed6..2cee298c2 100644
--- a/examples.mk
+++ b/examples.mk
@@ -37,6 +37,7 @@ vpxdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
 vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h
 vpxenc.SRCS                 += tools_common.c tools_common.h
+vpxenc.SRCS                 += webmenc.c webmenc.h
 vpxenc.SRCS                 += vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
 vpxenc.SRCS                 += vpx_ports/vpx_timer.h
@@ -50,6 +51,7 @@ UTILS-$(CONFIG_VP8_ENCODER)    += vp8_scalable_patterns.c
 vp8_scalable_patterns.GUID   = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
 vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
 UTILS-$(CONFIG_VP9_ENCODER)    += vp9_spatial_scalable_encoder.c
+vp9_spatial_scalable_encoder.SRCS += args.c args.h
 vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
 vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
 
diff --git a/libs.mk b/libs.mk
index 4691a1258..40628338f 100644
--- a/libs.mk
+++ b/libs.mk
@@ -122,6 +122,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
   CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
   CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
   INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
+  INSTALL-LIBS-yes += include/vpx/svc_context.h
   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
   CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
   CODEC_DOC_SECTIONS += vp9 vp9_encoder
diff --git a/test/svc_test.cc b/test/svc_test.cc
new file mode 100644
index 000000000..5941caed8
--- /dev/null
+++ b/test/svc_test.cc
@@ -0,0 +1,310 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "vpx/svc_context.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+
+namespace {
+
+using libvpx_test::CodecFactory;
+using libvpx_test::Decoder;
+using libvpx_test::VP9CodecFactory;
+
+class SvcTest : public ::testing::Test {
+ protected:
+  static const uint32_t kWidth = 352;
+  static const uint32_t kHeight = 288;
+
+  SvcTest()
+      : codec_iface_(0),
+        test_file_name_("hantro_collage_w352h288.yuv"),
+        decoder_(0) {}
+
+  virtual ~SvcTest() {}
+
+  virtual void SetUp() {
+    memset(&svc_, 0, sizeof(svc_));
+    svc_.first_frame_full_size = 1;
+    svc_.encoding_mode = INTER_LAYER_PREDICTION_IP;
+    svc_.log_level = SVC_LOG_DEBUG;
+    svc_.log_print = 0;
+
+    codec_iface_ = vpx_codec_vp9_cx();
+    const vpx_codec_err_t res =
+        vpx_codec_enc_config_default(codec_iface_, &codec_enc_, 0);
+    EXPECT_EQ(VPX_CODEC_OK, res);
+
+    codec_enc_.g_w = kWidth;
+    codec_enc_.g_h = kHeight;
+    codec_enc_.g_timebase.num = 1;
+    codec_enc_.g_timebase.den = 60;
+    codec_enc_.kf_min_dist = 100;
+    codec_enc_.kf_max_dist = 100;
+
+    vpx_codec_dec_cfg_t dec_cfg = {0};
+    VP9CodecFactory codec_factory;
+    decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
+  }
+
+  virtual void TearDown() {
+    vpx_svc_release(&svc_);
+  }
+
+  SvcContext svc_;
+  vpx_codec_ctx_t codec_;
+  struct vpx_codec_enc_cfg codec_enc_;
+  vpx_codec_iface_t *codec_iface_;
+  std::string test_file_name_;
+
+  Decoder *decoder_;
+};
+
+TEST_F(SvcTest, SvcInit) {
+  svc_.spatial_layers = 0;  // use default layers
+  vpx_codec_err_t res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(VPX_SS_DEFAULT_LAYERS, svc_.spatial_layers);
+
+  res = vpx_svc_init(NULL, &codec_, codec_iface_, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_init(&svc_, NULL, codec_iface_, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_init(&svc_, &codec_, NULL, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_init(&svc_, &codec_, codec_iface_, NULL);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  svc_.spatial_layers = 6;  // too many layers
+  res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  svc_.spatial_layers = 2;
+  vpx_svc_set_scale_factors(&svc_, "4/16,16*16");  // invalid scale values
+  res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");  // valid scale values
+  res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+}
+
+TEST_F(SvcTest, SetOptions) {
+  vpx_codec_err_t res = vpx_svc_set_options(NULL, "layers=3");
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_options(&svc_, NULL);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_options(&svc_, "layers=3");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(3, svc_.spatial_layers);
+
+  vpx_svc_set_options(&svc_, "not-an-option=1");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_options(&svc_, "encoding-mode=alt-ip");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(ALT_INTER_LAYER_PREDICTION_IP, svc_.encoding_mode);
+
+  vpx_svc_set_options(&svc_, "layers=2 encoding-mode=ip");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(2, svc_.spatial_layers);
+  EXPECT_EQ(INTER_LAYER_PREDICTION_IP, svc_.encoding_mode);
+
+  vpx_svc_set_options(&svc_, "scale-factors=not-scale-factors");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_options(&svc_, "scale-factors=1/3,2/3");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+
+  vpx_svc_set_options(&svc_, "quantizers=not-quantizers");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_options(&svc_, "quantizers=40,45");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+}
+
+TEST_F(SvcTest, SetQuantizers) {
+  vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30");
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_quantizers(&svc_, NULL);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  svc_.first_frame_full_size = 0;
+  svc_.spatial_layers = 2;
+  res = vpx_svc_set_quantizers(&svc_, "40,30");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+
+  res = vpx_svc_set_quantizers(&svc_, "40");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+}
+
+TEST_F(SvcTest, SetScaleFactors) {
+  vpx_codec_err_t res = vpx_svc_set_scale_factors(NULL, "4/16,16/16");
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_scale_factors(&svc_, NULL);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  svc_.first_frame_full_size = 0;
+  svc_.spatial_layers = 2;
+  res = vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+
+  res = vpx_svc_set_scale_factors(&svc_, "4/16");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+}
+
+// test that decoder can handle an SVC frame as the first frame in a sequence
+// this test is disabled since it always fails because of a decoder issue
+// https://code.google.com/p/webm/issues/detail?id=654
+TEST_F(SvcTest, DISABLED_FirstFrameHasLayers) {
+  svc_.first_frame_full_size = 0;
+  svc_.spatial_layers = 2;
+  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
+  vpx_svc_set_quantizers(&svc_, "40,30");
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+
+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
+  video.Begin();
+
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_REALTIME);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+
+  const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+
+  // this test fails with a decoder error
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+}
+
+TEST_F(SvcTest, EncodeThreeFrames) {
+  svc_.first_frame_full_size = 1;
+  svc_.spatial_layers = 2;
+  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
+  vpx_svc_set_quantizers(&svc_, "40,30");
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+
+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
+  // FRAME 1
+  video.Begin();
+  // this frame is full size, with only one layer
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_REALTIME);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
+
+  vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+
+  // FRAME 2
+  video.Next();
+  // this is an I-frame
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_REALTIME);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
+
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+
+  // FRAME 2
+  video.Next();
+  // this is a P-frame
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_REALTIME);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
+
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+}
+
+TEST_F(SvcTest, GetLayerResolution) {
+  svc_.first_frame_full_size = 0;
+  svc_.spatial_layers = 2;
+  vpx_svc_set_scale_factors(&svc_, "4/16,8/16");
+  vpx_svc_set_quantizers(&svc_, "40,30");
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+
+  // ensure that requested layer is a valid layer
+  uint32_t layer_width, layer_height;
+  res = vpx_svc_get_layer_resolution(&svc_, svc_.spatial_layers,
+                                     &layer_width, &layer_height);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_get_layer_resolution(NULL, 0, &layer_width, &layer_height);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_get_layer_resolution(&svc_, 0, NULL, &layer_height);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_get_layer_resolution(&svc_, 0, &layer_width, NULL);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_get_layer_resolution(&svc_, 0, &layer_width, &layer_height);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(kWidth * 4 / 16, layer_width);
+  EXPECT_EQ(kHeight * 4 / 16, layer_height);
+
+  res = vpx_svc_get_layer_resolution(&svc_, 1, &layer_width, &layer_height);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(kWidth * 8 / 16, layer_width);
+  EXPECT_EQ(kHeight * 8 / 16, layer_height);
+}
+
+}  // namespace
diff --git a/test/test.mk b/test/test.mk
index e07dc7724..f7a5d15a3 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -96,6 +96,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.cc
 
 endif # VP9
 
diff --git a/test/video_source.h b/test/video_source.h
index 26d53282f..3d01d39b2 100644
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -18,16 +18,35 @@
 
 namespace libvpx_test {
 
-static FILE *OpenTestDataFile(const std::string& file_name) {
-  std::string path_to_source = file_name;
-  const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH");
-
-  if (kDataPath) {
-    path_to_source = kDataPath;
-    path_to_source += "/";
-    path_to_source += file_name;
+// Helper macros to ensure LIBVPX_TEST_DATA_PATH is a quoted string.
+// These are undefined right below GetDataPath
+// NOTE: LIBVPX_TEST_DATA_PATH MUST NOT be a quoted string before
+// Stringification or the GetDataPath will fail at runtime
+#define TO_STRING(S) #S
+#define STRINGIFY(S) TO_STRING(S)
+
+// A simple function to encapsulate cross platform retrieval of test data path
+static std::string GetDataPath() {
+  const char *const data_path = getenv("LIBVPX_TEST_DATA_PATH");
+  if (data_path == NULL) {
+#ifdef LIBVPX_TEST_DATA_PATH
+    // In some environments, we cannot set environment variables
+    // Instead, we set the data path by using a preprocessor symbol
+    // which can be set from make files
+    return STRINGIFY(LIBVPX_TEST_DATA_PATH);
+#else
+    return ".";
+#endif
   }
+  return data_path;
+}
 
+// Undefining stringification macros because they are not used elsewhere
+#undef TO_STRING
+#undef STRINGIFY
+
+static FILE *OpenTestDataFile(const std::string& file_name) {
+  const std::string path_to_source = GetDataPath() + "/" + file_name;
   return fopen(path_to_source.c_str(), "rb");
 }
 
diff --git a/test/vp9_lossless_test.cc b/test/vp9_lossless_test.cc
index 441cc44da..30a3118eb 100644
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -35,7 +35,7 @@ class LossLessTest : public ::libvpx_test::EncoderTest,
   }
 
   virtual void BeginPassHook(unsigned int /*pass*/) {
-    psnr_ = 0.0;
+    psnr_ = kMaxPsnr;
     nframes_ = 0;
   }
 
@@ -65,9 +65,9 @@ TEST_P(LossLessTest, TestLossLessEncoding) {
   init_flags_ = VPX_CODEC_USE_PSNR;
 
   // intentionally changed the dimension for better testing coverage
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284,
+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                      timebase.den, timebase.num, 0, 30);
-
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   const double psnr_lossless = GetMinPsnr();
   EXPECT_GE(psnr_lossless, kMaxPsnr);
 }
diff --git a/tools_common.c b/tools_common.c
index 92de79418..44b2a3fa0 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -7,8 +7,11 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#include <stdio.h>
 #include "tools_common.h"
+
+#include <stdarg.h>
+#include <stdlib.h>
+
 #if defined(_WIN32) || defined(__OS2__)
 #include <io.h>
 #include <fcntl.h>
@@ -20,6 +23,18 @@
 #endif
 #endif
 
+#define LOG_ERROR(label) do {\
+  const char *l = label;\
+  va_list ap;\
+  va_start(ap, fmt);\
+  if (l)\
+    fprintf(stderr, "%s: ", l);\
+  vfprintf(stderr, fmt, ap);\
+  fprintf(stderr, "\n");\
+  va_end(ap);\
+} while (0)
+
+
 FILE *set_binary_mode(FILE *stream) {
   (void)stream;
 #if defined(_WIN32) || defined(__OS2__)
@@ -27,3 +42,17 @@ FILE *set_binary_mode(FILE *stream) {
 #endif
   return stream;
 }
+
+void die(const char *fmt, ...) {
+  LOG_ERROR(NULL);
+  usage_exit();
+}
+
+void fatal(const char *fmt, ...) {
+  LOG_ERROR("Fatal");
+  exit(EXIT_FAILURE);
+}
+
+void warn(const char *fmt, ...) {
+  LOG_ERROR("Warning");
+}
diff --git a/tools_common.h b/tools_common.h
index 9e56149a5..7dfd5ad21 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -7,10 +7,24 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#ifndef TOOLS_COMMON_H
-#define TOOLS_COMMON_H
+#ifndef TOOLS_COMMON_H_
+#define TOOLS_COMMON_H_
+
+#include <stdio.h>
+
+#define VP8_FOURCC (0x30385056)
+#define VP9_FOURCC (0x30395056)
+#define VP8_FOURCC_MASK (0x00385056)
+#define VP9_FOURCC_MASK (0x00395056)
 
 /* Sets a stdio stream into binary mode */
 FILE *set_binary_mode(FILE *stream);
 
-#endif
+void die(const char *fmt, ...);
+void fatal(const char *fmt, ...);
+void warn(const char *fmt, ...);
+
+/* The tool including this file must define usage_exit() */
+void usage_exit();
+
+#endif  // TOOLS_COMMON_H_
diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm
new file mode 100644
index 000000000..1f03ff60e
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm
@@ -0,0 +1,144 @@
+;
+;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+    EXPORT  |vp9_idct32x32_1_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+    ;TODO(hkuang): put the following macros in a seperate
+    ;file so other idct function could also use them.
+    MACRO
+    LD_16x8          $src, $stride
+    vld1.8           {q8}, [$src], $stride
+    vld1.8           {q9}, [$src], $stride
+    vld1.8           {q10}, [$src], $stride
+    vld1.8           {q11}, [$src], $stride
+    vld1.8           {q12}, [$src], $stride
+    vld1.8           {q13}, [$src], $stride
+    vld1.8           {q14}, [$src], $stride
+    vld1.8           {q15}, [$src], $stride
+    MEND
+
+    MACRO
+    ADD_DIFF_16x8    $diff
+    vqadd.u8         q8, q8, $diff
+    vqadd.u8         q9, q9, $diff
+    vqadd.u8         q10, q10, $diff
+    vqadd.u8         q11, q11, $diff
+    vqadd.u8         q12, q12, $diff
+    vqadd.u8         q13, q13, $diff
+    vqadd.u8         q14, q14, $diff
+    vqadd.u8         q15, q15, $diff
+    MEND
+
+    MACRO
+    SUB_DIFF_16x8    $diff
+    vqsub.u8         q8, q8, $diff
+    vqsub.u8         q9, q9, $diff
+    vqsub.u8         q10, q10, $diff
+    vqsub.u8         q11, q11, $diff
+    vqsub.u8         q12, q12, $diff
+    vqsub.u8         q13, q13, $diff
+    vqsub.u8         q14, q14, $diff
+    vqsub.u8         q15, q15, $diff
+    MEND
+
+    MACRO
+    ST_16x8          $dst, $stride
+    vst1.8           {q8}, [$dst], $stride
+    vst1.8           {q9}, [$dst], $stride
+    vst1.8           {q10},[$dst], $stride
+    vst1.8           {q11},[$dst], $stride
+    vst1.8           {q12},[$dst], $stride
+    vst1.8           {q13},[$dst], $stride
+    vst1.8           {q14},[$dst], $stride
+    vst1.8           {q15},[$dst], $stride
+    MEND
+
+;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,
+;                              int dest_stride)
+;
+; r0  int16_t input
+; r1  uint8_t *dest
+; r2  int dest_stride
+
+|vp9_idct32x32_1_add_neon| PROC
+    push             {lr}
+    pld              [r1]
+    add              r3, r1, #16               ; r3 dest + 16 for second loop
+    ldrsh            r0, [r0]
+
+    ; generate cospi_16_64 = 11585
+    mov              r12, #0x2d00
+    add              r12, #0x41
+
+    ; out = dct_const_round_shift(input[0] * cospi_16_64)
+    mul              r0, r0, r12               ; input[0] * cospi_16_64
+    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
+    asr              r0, r0, #14               ; >> DCT_CONST_BITS
+
+    ; out = dct_const_round_shift(out * cospi_16_64)
+    mul              r0, r0, r12               ; out * cospi_16_64
+    mov              r12, r1                   ; save dest
+    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
+    asr              r0, r0, #14               ; >> DCT_CONST_BITS
+
+    ; a1 = ROUND_POWER_OF_TWO(out, 6)
+    add              r0, r0, #32               ; + (1 <<((6) - 1))
+    asrs             r0, r0, #6                ; >> 6
+    bge              diff_positive_32_32
+
+diff_negative_32_32
+    neg              r0, r0
+    usat             r0, #8, r0
+    vdup.u8          q0, r0
+    mov              r0, #4
+
+diff_negative_32_32_loop
+    sub              r0, #1
+    LD_16x8          r1, r2
+    SUB_DIFF_16x8    q0
+    ST_16x8          r12, r2
+
+    LD_16x8          r1, r2
+    SUB_DIFF_16x8    q0
+    ST_16x8          r12, r2
+    cmp              r0, #2
+    moveq            r1, r3
+    moveq            r12, r3
+    cmp              r0, #0
+    bne              diff_negative_32_32_loop
+    pop              {r3,pc}
+
+diff_positive_32_32
+    usat             r0, #8, r0
+    vdup.u8          q0, r0
+    mov              r0, #4
+
+diff_positive_32_32_loop
+    sub              r0, #1
+    LD_16x8          r1, r2
+    ADD_DIFF_16x8    q0
+    ST_16x8          r12, r2
+
+    LD_16x8          r1, r2
+    ADD_DIFF_16x8    q0
+    ST_16x8          r12, r2
+    cmp              r0, #2
+    moveq            r1, r3
+    moveq            r12, r3
+    cmp              r0, #0
+    bne              diff_positive_32_32_loop
+    pop              {pc}
+
+    ENDP             ; |vp9_idct32x32_1_add_neon|
+    END
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index acb4724e5..452dd6b89 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -221,8 +221,6 @@ extern "C"
   int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
                            unsigned int height);
 
-  int vp9_switch_layer(VP9_PTR comp, int layer);
-
   void vp9_set_svc(VP9_PTR comp, int use_svc);
 
   int vp9_get_quantizer(VP9_PTR c);
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 57ca5c5da..6018e1775 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -403,8 +403,8 @@ void vp9_set_pred_flag_seg_id(MACROBLOCKD *xd, uint8_t pred_flag) {
 int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
                        BLOCK_SIZE bsize, int mi_row, int mi_col) {
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = 1 << mi_width_log2(bsize);
-  const int bh = 1 << mi_height_log2(bsize);
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
   const int xmis = MIN(cm->mi_cols - mi_col, bw);
   const int ymis = MIN(cm->mi_rows - mi_row, bh);
   int x, y, segment_id = INT_MAX;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 3f3268f2d..6313f3337 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -158,7 +158,7 @@ prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const
 specialize vp9_d63_predictor_32x32 $ssse3_x86inc
 
 prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_32x32 $ssse3 x86inc
+specialize vp9_h_predictor_32x32 $ssse3_x86inc
 
 prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
 specialize vp9_d117_predictor_32x32
@@ -299,7 +299,7 @@ prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int de
 specialize vp9_idct32x32_34_add sse2
 
 prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1_add sse2 dspr2
+specialize vp9_idct32x32_1_add sse2 neon dspr2
 
 prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
 specialize vp9_iht4x4_16_add sse2 neon dspr2
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index ccf5aac17..2a3384488 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -15,6 +15,16 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_idct.h"
 
+#define RECON_AND_STORE4X4(dest, in_x) \
+{                                                     \
+  __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
+  d0 = _mm_unpacklo_epi8(d0, zero); \
+  d0 = _mm_add_epi16(in_x, d0); \
+  d0 = _mm_packus_epi16(d0, d0); \
+  *(int *)dest = _mm_cvtsi128_si32(d0); \
+  dest += stride; \
+}
+
 void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i eight = _mm_set1_epi16(8);
@@ -26,21 +36,19 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   __m128i input0, input1, input2, input3;
 
   // Rows
-  input0 = _mm_loadl_epi64((const __m128i *)input);
-  input1 = _mm_loadl_epi64((const __m128i *)(input + 4));
-  input2 = _mm_loadl_epi64((const __m128i *)(input + 8));
-  input3 = _mm_loadl_epi64((const __m128i *)(input + 12));
+  input0 = _mm_load_si128((const __m128i *)input);
+  input2 = _mm_load_si128((const __m128i *)(input + 8));
 
   // Construct i3, i1, i3, i1, i2, i0, i2, i0
   input0 = _mm_shufflelo_epi16(input0, 0xd8);
-  input1 = _mm_shufflelo_epi16(input1, 0xd8);
+  input0 = _mm_shufflehi_epi16(input0, 0xd8);
   input2 = _mm_shufflelo_epi16(input2, 0xd8);
-  input3 = _mm_shufflelo_epi16(input3, 0xd8);
+  input2 = _mm_shufflehi_epi16(input2, 0xd8);
 
+  input1 = _mm_unpackhi_epi32(input0, input0);
   input0 = _mm_unpacklo_epi32(input0, input0);
-  input1 = _mm_unpacklo_epi32(input1, input1);
+  input3 = _mm_unpackhi_epi32(input2, input2);
   input2 = _mm_unpacklo_epi32(input2, input2);
-  input3 = _mm_unpacklo_epi32(input3, input3);
 
   // Stage 1
   input0 = _mm_madd_epi16(input0, cst);
@@ -59,16 +67,14 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
 
   // Stage 2
-  input0 = _mm_packs_epi32(input0, zero);
-  input1 = _mm_packs_epi32(input1, zero);
-  input2 = _mm_packs_epi32(input2, zero);
-  input3 = _mm_packs_epi32(input3, zero);
+  input0 = _mm_packs_epi32(input0, input1);
+  input1 = _mm_packs_epi32(input2, input3);
 
   // Transpose
-  input1 = _mm_unpacklo_epi16(input0, input1);
-  input3 = _mm_unpacklo_epi16(input2, input3);
-  input0 = _mm_unpacklo_epi32(input1, input3);
-  input1 = _mm_unpackhi_epi32(input1, input3);
+  input2 = _mm_unpacklo_epi16(input0, input1);
+  input3 = _mm_unpackhi_epi16(input0, input1);
+  input0 = _mm_unpacklo_epi32(input2, input3);
+  input1 = _mm_unpackhi_epi32(input2, input3);
 
   // Switch column2, column 3, and then, we got:
   // input2: column1, column 0;  input3: column2, column 3.
@@ -78,14 +84,9 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
 
   // Columns
   // Construct i3, i1, i3, i1, i2, i0, i2, i0
-  input0 = _mm_shufflelo_epi16(input2, 0xd8);
-  input1 = _mm_shufflehi_epi16(input2, 0xd8);
-  input2 = _mm_shufflehi_epi16(input3, 0xd8);
-  input3 = _mm_shufflelo_epi16(input3, 0xd8);
-
-  input0 = _mm_unpacklo_epi32(input0, input0);
-  input1 = _mm_unpackhi_epi32(input1, input1);
-  input2 = _mm_unpackhi_epi32(input2, input2);
+  input0 = _mm_unpacklo_epi32(input2, input2);
+  input1 = _mm_unpackhi_epi32(input2, input2);
+  input2 = _mm_unpackhi_epi32(input3, input3);
   input3 = _mm_unpacklo_epi32(input3, input3);
 
   // Stage 1
@@ -105,16 +106,14 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
 
   // Stage 2
-  input0 = _mm_packs_epi32(input0, zero);
-  input1 = _mm_packs_epi32(input1, zero);
-  input2 = _mm_packs_epi32(input2, zero);
-  input3 = _mm_packs_epi32(input3, zero);
+  input0 = _mm_packs_epi32(input0, input2);
+  input1 = _mm_packs_epi32(input1, input3);
 
   // Transpose
-  input1 = _mm_unpacklo_epi16(input0, input1);
-  input3 = _mm_unpacklo_epi16(input2, input3);
-  input0 = _mm_unpacklo_epi32(input1, input3);
-  input1 = _mm_unpackhi_epi32(input1, input3);
+  input2 = _mm_unpacklo_epi16(input0, input1);
+  input3 = _mm_unpackhi_epi16(input0, input1);
+  input0 = _mm_unpacklo_epi32(input2, input3);
+  input1 = _mm_unpackhi_epi32(input2, input3);
 
   // Switch column2, column 3, and then, we got:
   // input2: column1, column 0;  input3: column2, column 3.
@@ -129,23 +128,31 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   input2 = _mm_srai_epi16(input2, 4);
   input3 = _mm_srai_epi16(input3, 4);
 
-#define RECON_AND_STORE4X4(dest, in_x) \
-  {                                                     \
-      __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
-      d0 = _mm_unpacklo_epi8(d0, zero); \
-      d0 = _mm_add_epi16(in_x, d0); \
-      d0 = _mm_packus_epi16(d0, d0); \
-      *(int *)dest = _mm_cvtsi128_si32(d0); \
-      dest += stride; \
+  // Reconstruction and Store
+  {
+     __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+     __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+     d0 = _mm_unpacklo_epi32(d0,
+          _mm_cvtsi32_si128(*(const int *) (dest + stride)));
+     d2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(
+                    *(const int *) (dest + stride * 3)), d2);
+     d0 = _mm_unpacklo_epi8(d0, zero);
+     d2 = _mm_unpacklo_epi8(d2, zero);
+     d0 = _mm_add_epi16(d0, input2);
+     d2 = _mm_add_epi16(d2, input3);
+     d0 = _mm_packus_epi16(d0, d2);
+     // store input0
+     *(int *)dest = _mm_cvtsi128_si32(d0);
+     // store input1
+     d0 = _mm_srli_si128(d0, 4);
+     *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+     // store input2
+     d0 = _mm_srli_si128(d0, 4);
+     *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+     // store input3
+     d0 = _mm_srli_si128(d0, 4);
+     *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
   }
-
-  input0 = _mm_srli_si128(input2, 8);
-  input1 = _mm_srli_si128(input3, 8);
-
-  RECON_AND_STORE4X4(dest, input2);
-  RECON_AND_STORE4X4(dest, input0);
-  RECON_AND_STORE4X4(dest, input1);
-  RECON_AND_STORE4X4(dest, input3);
 }
 
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h
index c86451649..fd8e74ca4 100644
--- a/vp9/decoder/vp9_dboolhuff.h
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -44,7 +44,7 @@ static int vp9_read(vp9_reader *br, int probability) {
   VP9_BD_VALUE bigsplit;
   int count;
   unsigned int range;
-  unsigned int split = 1 + (((br->range - 1) * probability) >> 8);
+  unsigned int split = ((br->range * probability) + (256 - probability)) >> 8;
 
   if (br->count < 0)
     vp9_reader_fill(br);
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 1ca578621..abdcf955c 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -91,8 +91,8 @@ static TX_SIZE read_tx_size(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize,
                            int mi_row, int mi_col, int segment_id) {
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = 1 << mi_width_log2(bsize);
-  const int bh = 1 << mi_height_log2(bsize);
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
   const int xmis = MIN(cm->mi_cols - mi_col, bw);
   const int ymis = MIN(cm->mi_rows - mi_row, bh);
   int x, y;
@@ -260,6 +260,16 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
   mv->col = ref->col + diff.col;
 }
 
+static COMPPREDMODE_TYPE read_reference_mode(VP9_COMMON *cm,
+                                             const MACROBLOCKD *xd,
+                                             vp9_reader *r) {
+  const int ctx = vp9_get_pred_context_comp_inter_inter(cm, xd);
+  const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
+  if (!cm->frame_parallel_decoding_mode)
+    ++cm->counts.comp_inter[ctx][mode];
+  return mode;  // SINGLE_PREDICTION_ONLY or COMP_PREDICTION_ONLY
+}
+
 // Read the referncence frame
 static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                             vp9_reader *r,
@@ -271,27 +281,20 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
     ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
     ref_frame[1] = NONE;
   } else {
-    const int comp_ctx = vp9_get_pred_context_comp_inter_inter(cm, xd);
-    int is_comp;
-
-    if (cm->comp_pred_mode == HYBRID_PREDICTION) {
-      is_comp = vp9_read(r, fc->comp_inter_prob[comp_ctx]);
-      if (!cm->frame_parallel_decoding_mode)
-        ++counts->comp_inter[comp_ctx][is_comp];
-    } else {
-      is_comp = cm->comp_pred_mode == COMP_PREDICTION_ONLY;
-    }
+    const COMPPREDMODE_TYPE mode = (cm->comp_pred_mode == HYBRID_PREDICTION)
+                                      ? read_reference_mode(cm, xd, r)
+                                      : cm->comp_pred_mode;
 
     // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
-    if (is_comp) {
-      const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
-      const int ref_ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
-      const int b = vp9_read(r, fc->comp_ref_prob[ref_ctx]);
+    if (mode == COMP_PREDICTION_ONLY) {
+      const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+      const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
+      const int bit = vp9_read(r, fc->comp_ref_prob[ctx]);
       if (!cm->frame_parallel_decoding_mode)
-        ++counts->comp_ref[ref_ctx][b];
-      ref_frame[fix_ref_idx] = cm->comp_fixed_ref;
-      ref_frame[!fix_ref_idx] = cm->comp_var_ref[b];
-    } else {
+        ++counts->comp_ref[ctx][bit];
+      ref_frame[idx] = cm->comp_fixed_ref;
+      ref_frame[!idx] = cm->comp_var_ref[bit];
+    } else if (mode == SINGLE_PREDICTION_ONLY) {
       const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
       const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
       if (!cm->frame_parallel_decoding_mode)
@@ -299,14 +302,16 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
       if (bit0) {
         const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
         const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
-        ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
         if (!cm->frame_parallel_decoding_mode)
           ++counts->single_ref[ctx1][1][bit1];
+        ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
       } else {
         ref_frame[0] = LAST_FRAME;
       }
 
       ref_frame[1] = NONE;
+    } else {
+      assert(!"Invalid prediction mode.");
     }
   }
 }
@@ -550,8 +555,8 @@ void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                         int mi_row, int mi_col, vp9_reader *r) {
   MODE_INFO *const mi = xd->mi_8x8[0];
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
-  const int bw = 1 << mi_width_log2(bsize);
-  const int bh = 1 << mi_height_log2(bsize);
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
   const int y_mis = MIN(bh, cm->mi_rows - mi_row);
   const int x_mis = MIN(bw, cm->mi_cols - mi_col);
   int x, y, z;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 1fd9e979a..7e3bbaa16 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -153,47 +153,38 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
       vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
 }
 
-static void update_mv(vp9_reader *r, vp9_prob *p) {
-  if (vp9_read(r, NMV_UPDATE_PROB))
-    *p = (vp9_read_literal(r, 7) << 1) | 1;
+static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) {
+  int i;
+  for (i = 0; i < n; ++i)
+    if (vp9_read(r, NMV_UPDATE_PROB))
+       p[i] = (vp9_read_literal(r, 7) << 1) | 1;
 }
 
-static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) {
-  int i, j, k;
+static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
+  int i, j;
 
-  for (j = 0; j < MV_JOINTS - 1; ++j)
-    update_mv(r, &mvc->joints[j]);
+  update_mv_probs(ctx->joints, MV_JOINTS - 1, r);
 
   for (i = 0; i < 2; ++i) {
-    nmv_component *const comp = &mvc->comps[i];
-
-    update_mv(r, &comp->sign);
-
-    for (j = 0; j < MV_CLASSES - 1; ++j)
-      update_mv(r, &comp->classes[j]);
-
-    for (j = 0; j < CLASS0_SIZE - 1; ++j)
-      update_mv(r, &comp->class0[j]);
-
-    for (j = 0; j < MV_OFFSET_BITS; ++j)
-      update_mv(r, &comp->bits[j]);
+    nmv_component *const comp_ctx = &ctx->comps[i];
+    update_mv_probs(&comp_ctx->sign, 1, r);
+    update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r);
+    update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r);
+    update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r);
   }
 
   for (i = 0; i < 2; ++i) {
-    nmv_component *const comp = &mvc->comps[i];
-
+    nmv_component *const comp_ctx = &ctx->comps[i];
     for (j = 0; j < CLASS0_SIZE; ++j)
-      for (k = 0; k < 3; ++k)
-        update_mv(r, &comp->class0_fp[j][k]);
-
-    for (j = 0; j < 3; ++j)
-      update_mv(r, &comp->fp[j]);
+      update_mv_probs(comp_ctx->class0_fp[j], 3, r);
+    update_mv_probs(comp_ctx->fp, 3, r);
   }
 
   if (allow_hp) {
     for (i = 0; i < 2; ++i) {
-      update_mv(r, &mvc->comps[i].class0_hp);
-      update_mv(r, &mvc->comps[i].hp);
+      nmv_component *const comp_ctx = &ctx->comps[i];
+      update_mv_probs(&comp_ctx->class0_hp, 1, r);
+      update_mv_probs(&comp_ctx->hp, 1, r);
     }
   }
 }
@@ -209,20 +200,22 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
 // Allocate storage for each tile column.
 // TODO(jzern): when max_threads <= 1 the same storage could be used for each
 // tile.
-static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) {
+static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) {
   VP9_COMMON *const cm = &pbi->common;
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
-  int i, tile_col;
+  int i, tile_row, tile_col;
 
   CHECK_MEM_ERROR(cm, pbi->mi_streams,
-                  vpx_realloc(pbi->mi_streams, tile_cols *
+                  vpx_realloc(pbi->mi_streams, tile_rows * tile_cols *
                               sizeof(*pbi->mi_streams)));
-  for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-    TileInfo tile;
-
-    vp9_tile_init(&tile, cm, 0, tile_col);
-    pbi->mi_streams[tile_col] =
-        &cm->mi[cm->mi_rows * tile.mi_col_start];
+  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+      TileInfo tile;
+      vp9_tile_init(&tile, cm, tile_row, tile_col);
+      pbi->mi_streams[tile_row * tile_cols + tile_col] =
+          &cm->mi[tile.mi_row_start * cm->mode_info_stride
+                  + tile.mi_col_start];
+    }
   }
 
   // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
@@ -248,7 +241,7 @@ static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) {
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
                                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
+  int16_t* const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   const int stride = pd->dst.stride;
   const int eob = pd->eobs[block];
   if (eob > 0) {
@@ -261,35 +254,35 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
       case TX_4X4:
         tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block);
         if (tx_type == DCT_DCT)
-          xd->itxm_add(qcoeff, dst, stride, eob);
+          xd->itxm_add(dqcoeff, dst, stride, eob);
         else
-          vp9_iht4x4_16_add(qcoeff, dst, stride, tx_type);
+          vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type);
         break;
       case TX_8X8:
         tx_type = get_tx_type_8x8(pd->plane_type, xd);
-        vp9_iht8x8_add(tx_type, qcoeff, dst, stride, eob);
+        vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
         break;
       case TX_16X16:
         tx_type = get_tx_type_16x16(pd->plane_type, xd);
-        vp9_iht16x16_add(tx_type, qcoeff, dst, stride, eob);
+        vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
         break;
       case TX_32X32:
         tx_type = DCT_DCT;
-        vp9_idct32x32_add(qcoeff, dst, stride, eob);
+        vp9_idct32x32_add(dqcoeff, dst, stride, eob);
         break;
       default:
         assert(!"Invalid transform size");
     }
 
     if (eob == 1) {
-      vpx_memset(qcoeff, 0, 2 * sizeof(qcoeff[0]));
+      vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
     } else {
       if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
-        vpx_memset(qcoeff, 0, 4 * (4 << tx_size) * sizeof(qcoeff[0]));
+        vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
       else if (tx_size == TX_32X32 && eob <= 34)
-        vpx_memset(qcoeff, 0, 256 * sizeof(qcoeff[0]));
+        vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
       else
-        vpx_memset(qcoeff, 0, (16 << (tx_size << 1)) * sizeof(qcoeff[0]));
+        vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
     }
   }
 }
@@ -360,16 +353,15 @@ static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   const int bh = num_8x8_blocks_high_lookup[bsize];
   const int bw = num_8x8_blocks_wide_lookup[bsize];
   const int offset = mi_row * cm->mode_info_stride + mi_col;
-
-  xd->mode_info_stride = cm->mode_info_stride;
+  const int tile_offset = tile->mi_row_start * cm->mode_info_stride +
+                          tile->mi_col_start;
 
   xd->mi_8x8 = cm->mi_grid_visible + offset;
   xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
 
   // we are using the mode info context stream here
-  xd->mi_8x8[0] = xd->mi_stream;
+  xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset;
   xd->mi_8x8[0]->mbmi.sb_type = bsize;
-  ++xd->mi_stream;
 
   // Special case: if prev_mi is NULL, the previous mode info context
   // cannot be used.
@@ -768,9 +760,10 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
 }
 
 static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
-                               int tile_col) {
+                               int tile_row, int tile_col) {
   int i;
-  xd->mi_stream = pbi->mi_streams[tile_col];
+  const int tile_cols = 1 << pbi->common.log2_tile_cols;
+  xd->mi_stream = pbi->mi_streams[tile_row * tile_cols + tile_col];
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     xd->above_context[i] = pbi->above_context[i];
@@ -874,77 +867,68 @@ static size_t get_tile(const uint8_t *const data_end,
   return size;
 }
 
-static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
-  vp9_reader residual_bc;
+typedef struct TileBuffer {
+  const uint8_t *data;
+  size_t size;
+} TileBuffer;
 
+static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
-
-  const uint8_t *const data_end = pbi->source + pbi->source_sz;
-  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+  const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
+  TileBuffer tile_buffers[4][1 << 6];
   int tile_row, tile_col;
+  const uint8_t *const data_end = pbi->source + pbi->source_sz;
+  const uint8_t *end = NULL;
+  vp9_reader r;
+
+  assert(tile_rows <= 4);
+  assert(tile_cols <= (1 << 6));
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
   vpx_memset(pbi->above_context[0], 0,
-             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
-             2 * aligned_mi_cols);
+             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols);
 
   vpx_memset(pbi->above_seg_context, 0,
-             sizeof(*pbi->above_seg_context) * aligned_mi_cols);
-
-  if (pbi->oxcf.inv_tile_order) {
-    const uint8_t *data_ptr2[4][1 << 6];
-    vp9_reader bc_bak = {0};
-
-    // pre-initialize the offsets, we're going to decode in inverse order
-    data_ptr2[0][0] = data;
-    for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-      for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-        const int last_tile =
-            tile_row == tile_rows - 1 && tile_col == tile_cols - 1;
-        const size_t size = get_tile(data_end, last_tile, &cm->error, &data);
-        data_ptr2[tile_row][tile_col] = data;
-        data += size;
-      }
+             sizeof(*pbi->above_seg_context) * aligned_cols);
+
+  // Load tile data into tile_buffers
+  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+      const int last_tile = tile_row == tile_rows - 1 &&
+                            tile_col == tile_cols - 1;
+      const size_t size = get_tile(data_end, last_tile, &cm->error, &data);
+      TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
+      buf->data = data;
+      buf->size = size;
+      data += size;
     }
+  }
 
-    for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-      for (tile_col = tile_cols - 1; tile_col >= 0; tile_col--) {
-        TileInfo tile;
-
-        vp9_tile_init(&tile, cm, tile_row, tile_col);
-        setup_token_decoder(data_ptr2[tile_row][tile_col], data_end,
-                            data_end - data_ptr2[tile_row][tile_col],
-                            &cm->error, &residual_bc);
-        setup_tile_context(pbi, xd, tile_col);
-        decode_tile(pbi, &tile, &residual_bc);
-        if (tile_row == tile_rows - 1 && tile_col == tile_cols - 1)
-          bc_bak = residual_bc;
-      }
-    }
-    residual_bc = bc_bak;
-  } else {
-    for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-      for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-        const int last_tile =
-            tile_row == tile_rows - 1 && tile_col == tile_cols - 1;
-        const size_t size = get_tile(data_end, last_tile, &cm->error, &data);
-        TileInfo tile;
-
-        vp9_tile_init(&tile, cm, tile_row, tile_col);
-
-        setup_token_decoder(data, data_end, size, &cm->error, &residual_bc);
-        setup_tile_context(pbi, xd, tile_col);
-        decode_tile(pbi, &tile, &residual_bc);
-        data += size;
-      }
+  // Decode tiles using data from tile_buffers
+  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+      const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1
+                                               : tile_col;
+      const int last_tile = tile_row == tile_rows - 1 &&
+                                 col == tile_cols - 1;
+      const TileBuffer *const buf = &tile_buffers[tile_row][col];
+      TileInfo tile;
+
+      vp9_tile_init(&tile, cm, tile_row, col);
+      setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r);
+      setup_tile_context(pbi, xd, tile_row, col);
+      decode_tile(pbi, &tile, &r);
+
+      if (last_tile)
+        end = vp9_reader_find_end(&r);
     }
   }
 
-  return vp9_reader_find_end(&residual_bc);
+  return end;
 }
 
 static int tile_worker_hook(void *arg1, void *arg2) {
@@ -1023,7 +1007,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
 
       setup_token_decoder(data, data_end, size, &cm->error,
                           &tile_data->bit_reader);
-      setup_tile_context(pbi, &tile_data->xd, tile_col);
+      setup_tile_context(pbi, &tile_data->xd, 0, tile_col);
 
       worker->had_error = 0;
       if (i == num_workers - 1 || tile_col == tile_cols - 1) {
@@ -1227,7 +1211,7 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
       for (i = 0; i < PARTITION_TYPES - 1; ++i)
         vp9_diff_update_prob(&r, &fc->partition_prob[j][i]);
 
-    read_mv_probs(&r, nmvc, cm->allow_high_precision_mv);
+    read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
   }
 
   return vp9_reader_has_error(&r);
@@ -1323,7 +1307,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
     }
   }
 
-  alloc_tile_storage(pbi, tile_cols);
+  alloc_tile_storage(pbi, tile_rows, tile_cols);
 
   xd->mi_8x8 = cm->mi_grid_visible;
   xd->mode_info_stride = cm->mode_info_stride;
@@ -1335,7 +1319,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   cm->fc = cm->frame_contexts[cm->frame_context_idx];
   vp9_zero(cm->counts);
   for (i = 0; i < MAX_MB_PLANE; ++i)
-    vp9_zero(xd->plane[i].qcoeff);
+    vp9_zero(xd->plane[i].dqcoeff);
 
   xd->corrupted = 0;
   new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 70d0d74ef..b8d670b96 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -61,20 +61,22 @@ static const vp9_prob cat6_prob[15] = {
   254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
 };
 
+static const int token_to_counttoken[MAX_ENTROPY_TOKENS] = {
+  ZERO_TOKEN, ONE_TOKEN, TWO_TOKEN, TWO_TOKEN,
+  TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, TWO_TOKEN,
+  TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, DCT_EOB_MODEL_TOKEN
+};
+
 #define INCREMENT_COUNT(token)                           \
   do {                                                   \
-    if (!cm->frame_parallel_decoding_mode) {             \
-      ++coef_counts[type][ref][band][pt]                 \
-                   [token >= TWO_TOKEN ?                 \
-                    (token == DCT_EOB_TOKEN ?            \
-                     DCT_EOB_MODEL_TOKEN : TWO_TOKEN) :  \
-                    token];                              \
-    }                                                    \
-  } while (0)
+     if (!cm->frame_parallel_decoding_mode) {            \
+       ++coef_counts[band][pt][token_to_counttoken[token]]; \
+     }                                                   \
+  } while (0);
 
 #define WRITE_COEF_CONTINUE(val, token)                  \
   {                                                      \
-    qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
+    dqcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
                             dq[c > 0] / (1 + (tx_size == TX_32X32)); \
     INCREMENT_COUNT(token);                              \
     token_cache[scan[c]] = vp9_pt_energy_class[token];   \
@@ -82,15 +84,14 @@ static const vp9_prob cat6_prob[15] = {
     continue;                                            \
   }
 
-#define ADJUST_COEF(prob, bits_count)  \
-  do {                                 \
-    if (vp9_read(r, prob))             \
-      val += 1 << bits_count;          \
+#define ADJUST_COEF(prob, bits_count)                   \
+  do {                                                  \
+    val += (vp9_read(r, prob) << bits_count);           \
   } while (0);
 
 static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
                         vp9_reader *r, int block_idx,
-                        PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
+                        PLANE_TYPE type, int seg_eob, int16_t *dqcoeff_ptr,
                         TX_SIZE tx_size, const int16_t *dq, int pt,
                         uint8_t *token_cache) {
   const FRAME_CONTEXT *const fc = &cm->fc;
@@ -102,7 +103,10 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
   vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
   uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } };
   const vp9_prob *prob;
-  vp9_coeff_count_model *coef_counts = counts->coef[tx_size];
+  unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
+      counts->coef[tx_size][type][ref];
+  unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] =
+      counts->eob_branch[tx_size][type][ref];
   const int16_t *scan, *nb;
   const uint8_t *const band_translate = get_band_translate(tx_size);
   get_scan(xd, tx_size, type, block_idx, &scan, &nb);
@@ -117,7 +121,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
     band = get_coef_band(band_translate, c);
     prob = coef_probs[band][pt];
     if (!cm->frame_parallel_decoding_mode)
-      ++counts->eob_branch[tx_size][type][ref][band][pt];
+      ++eob_branch_count[band][pt];
     if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
       break;
 
@@ -205,7 +209,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
 
   if (c < seg_eob) {
     if (!cm->frame_parallel_decoding_mode)
-      ++coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN];
+      ++coef_counts[band][pt][DCT_EOB_MODEL_TOKEN];
   }
 
   return c;
@@ -224,7 +228,7 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
                                     pd->left_context + loff);
 
   eob = decode_coefs(cm, xd, r, block,
-                     pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block),
+                     pd->plane_type, seg_eob, BLOCK_OFFSET(pd->dqcoeff, block),
                      tx_size, pd->dequant, pt, token_cache);
 
   set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 87bd36c2b..07a67a585 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1217,7 +1217,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
       TileInfo tile;
 
-      vp9_tile_init(&tile, cm, 0, tile_col);
+      vp9_tile_init(&tile, cm, tile_row, tile_col);
       tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
 
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 583c6c8d0..8033a4d15 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -136,7 +136,7 @@ struct macroblock {
 
   // note that token_costs is the cost when eob node is skipped
   vp9_coeff_cost token_costs[TX_SIZES];
-  uint8_t token_cache[1024];
+  DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
 
   int optimize;
 
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 86332bcf9..702fc70bb 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1490,7 +1490,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       return;
     }
   }
-  assert(mi_height_log2(bsize) == mi_width_log2(bsize));
+  assert(num_8x8_blocks_wide_lookup[bsize] ==
+             num_8x8_blocks_high_lookup[bsize]);
 
   if (bsize == BLOCK_16X16) {
     set_offsets(cpi, tile, mi_row, mi_col, bsize);
@@ -1764,7 +1765,7 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
 }
 
 static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
-                          int mi_row, TOKENEXTRA **tp, int *totalrate) {
+                          int mi_row, TOKENEXTRA **tp) {
   VP9_COMMON * const cm = &cpi->common;
   int mi_col;
 
@@ -1909,7 +1910,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
   MACROBLOCK * const x = &cpi->mb;
   VP9_COMMON * const cm = &cpi->common;
   MACROBLOCKD * const xd = &x->e_mbd;
-  int totalrate;
 
 //  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
 //           cpi->common.current_video_frame, cpi->common.show_frame,
@@ -1925,8 +1925,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
   }
 #endif
 
-  totalrate = 0;
-
   vp9_zero(cm->counts.switchable_interp);
   vp9_zero(cpi->tx_stepdown_count);
 
@@ -1988,7 +1986,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
                mi_row < tile.mi_row_end; mi_row += 8)
-            encode_sb_row(cpi, &tile, mi_row, &tp, &totalrate);
+            encode_sb_row(cpi, &tile, mi_row, &tp);
 
           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
           assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2014,10 +2012,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
     cpi->sf.skip_encode_frame = 0;
   }
 
-  // 256 rate units to the bit,
-  // projected_frame_size in units of BYTES
-  cpi->projected_frame_size = totalrate >> 8;
-
 #if 0
   // Keep record of the total distortion this time around for future use
   cpi->last_frame_distortion = cpi->frame_distortion;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index e52e8ec1e..75ed8eab7 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -430,11 +430,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
   // TODO(jingning): per transformed block zero forcing only enabled for
   // luma component. will integrate chroma components as well.
   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
-    int x, y;
+    int i, j;
     pd->eobs[block] = 0;
-    txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
-    ctx->ta[plane][x] = 0;
-    ctx->tl[plane][y] = 0;
+    txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+    ctx->ta[plane][i] = 0;
+    ctx->tl[plane][j] = 0;
     return;
   }
 
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c3dbc861d..6a3555d68 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -584,9 +584,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
       xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
       set_mi_row_col(xd, &tile,
                      mb_row << 1,
-                     1 << mi_height_log2(xd->mi_8x8[0]->mbmi.sb_type),
+                     num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type],
                      mb_col << 1,
-                     1 << mi_width_log2(xd->mi_8x8[0]->mbmi.sb_type),
+                     num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type],
                      cm->mi_rows, cm->mi_cols);
 
       if (cpi->sf.variance_adaptive_quantization) {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 1d3170a55..f922f900a 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -4242,37 +4242,9 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
   return 0;
 }
 
-int vp9_switch_layer(VP9_PTR comp, int layer) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
-
-  if (cpi->use_svc) {
-    cpi->current_layer = layer;
-
-    // Use buffer i for layer i LST
-    cpi->lst_fb_idx = layer;
-
-    // Use buffer i-1 for layer i Alt (Inter-layer prediction)
-    if (layer != 0) cpi->alt_fb_idx = layer - 1;
-
-    // Use the rest for Golden
-    if (layer < 2 * cpi->number_spatial_layers - NUM_REF_FRAMES)
-      cpi->gld_fb_idx = cpi->lst_fb_idx;
-    else
-      cpi->gld_fb_idx = 2 * cpi->number_spatial_layers - 1 - layer;
-
-    printf("Switching to layer %d:\n", layer);
-    printf("Using references: LST/GLD/ALT [%d|%d|%d]\n", cpi->lst_fb_idx,
-           cpi->gld_fb_idx, cpi->alt_fb_idx);
-  } else {
-    printf("Switching layer not supported. Enable SVC first \n");
-  }
-  return 0;
-}
-
 void vp9_set_svc(VP9_PTR comp, int use_svc) {
   VP9_COMP *cpi = (VP9_COMP *)comp;
   cpi->use_svc = use_svc;
-  if (cpi->use_svc) printf("Enabled SVC encoder \n");
   return;
 }
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 993919e5b..e49789e0c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -609,7 +609,7 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
 
   // TODO(jingning): temporarily enabled only for luma component
   rd = MIN(rd1, rd2);
-  if (plane == 0)
+  if (!xd->lossless && plane == 0)
     x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
 
   args->this_rate += args->rate;
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 579f7a6e9..7d4676e97 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -81,6 +81,7 @@ struct tokenize_b_args {
   MACROBLOCKD *xd;
   TOKENEXTRA **tp;
   TX_SIZE tx_size;
+  uint8_t *token_cache;
 };
 
 static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -99,6 +100,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
   VP9_COMP *cpi = args->cpi;
   MACROBLOCKD *xd = args->xd;
   TOKENEXTRA **tp = args->tp;
+  uint8_t *token_cache = args->token_cache;
   struct macroblockd_plane *pd = &xd->plane[plane];
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   int pt; /* near block/prev token context index */
@@ -113,7 +115,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
   vp9_coeff_count *const counts = cpi->coef_counts[tx_size];
   vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size];
   const int ref = is_inter_block(mbmi);
-  uint8_t token_cache[1024];
   const uint8_t *const band_translate = get_band_translate(tx_size);
   const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
   int aoff, loff;
@@ -197,7 +198,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
   const int mb_skip_context = vp9_get_pred_context_mbskip(xd);
   const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                               SEG_LVL_SKIP);
-  struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size};
+  struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};
 
   mbmi->skip_coeff = vp9_sb_is_skippable(xd, bsize);
   if (mbmi->skip_coeff) {
diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_vaq.c
index 3179ae301..1f9cb8709 100644
--- a/vp9/encoder/vp9_vaq.c
+++ b/vp9/encoder/vp9_vaq.c
@@ -118,8 +118,8 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
       ((-xd->mb_to_bottom_edge) >> 3) : 0;
 
   if (right_overflow || bottom_overflow) {
-    int bw = (1 << (mi_width_log2(bs)  + 3)) - right_overflow;
-    int bh = (1 << (mi_height_log2(bs) + 3)) - bottom_overflow;
+    const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow;
+    const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow;
     int avg;
     variance(x->plane[0].src.buf, x->plane[0].src.stride,
              vp9_64_zeros, 0, bw, bh, &sse, &avg);
diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm
index 533456b77..1a9e4e8b6 100644
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -118,6 +118,14 @@ SECTION .text
   RET
 %endmacro
 
+%macro INC_SRC_BY_SRC_STRIDE  0
+%if ARCH_X86=1 && CONFIG_PIC=1
+  add                srcq, src_stridemp
+%else
+  add                srcq, src_strideq
+%endif
+%endmacro
+
 %macro SUBPEL_VARIANCE 1-2 0 ; W
 %if cpuflag(ssse3)
 %define bilin_filter_m bilin_filter_m_ssse3
@@ -129,41 +137,85 @@ SECTION .text
 ; FIXME(rbultje) only bilinear filters use >8 registers, and ssse3 only uses
 ; 11, not 13, if the registers are ordered correctly. May make a minor speed
 ; difference on Win64
-%ifdef PIC
-%if %2 == 1 ; avg
-cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
-                                              x_offset, y_offset, \
-                                              dst, dst_stride, \
-                                              sec, sec_stride, height, sse
-%define sec_str sec_strideq
-%else
-cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, y_offset, \
-                                          dst, dst_stride, height, sse
-%endif
-%define h heightd
-%define bilin_filter sseq
-%else
-%if %2 == 1 ; avg
-cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
-                                    7 + 2 * ARCH_X86_64, 13, src, src_stride, \
-                                                         x_offset, y_offset, \
-                                                         dst, dst_stride, \
-                                                         sec, sec_stride, \
-                                                         height, sse
-%if ARCH_X86_64
-%define h heightd
-%define sec_str sec_strideq
-%else
-%define h dword heightm
-%define sec_str sec_stridemp
-%endif
+
+%ifdef PIC    ; 64bit PIC
+  %if %2 == 1 ; avg
+    cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
+                                      x_offset, y_offset, \
+                                      dst, dst_stride, \
+                                      sec, sec_stride, height, sse
+    %define sec_str sec_strideq
+  %else
+    cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
+                                  y_offset, dst, dst_stride, height, sse
+  %endif
+  %define h heightd
+  %define bilin_filter sseq
 %else
-cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
-                                          dst, dst_stride, height, sse
-%define h heightd
-%endif
-%define bilin_filter bilin_filter_m
+  %if ARCH_X86=1 && CONFIG_PIC=1
+    %if %2 == 1 ; avg
+      cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+                                  x_offset, y_offset, \
+                                  dst, dst_stride, \
+                                  sec, sec_stride, \
+                                  height, sse, g_bilin_filter, g_pw_8
+      %define h dword heightm
+      %define sec_str sec_stridemp
+
+      ;Store bilin_filter and pw_8 location in stack
+      GET_GOT eax
+      add esp, 4                ; restore esp
+
+      lea ecx, [GLOBAL(bilin_filter_m)]
+      mov g_bilin_filterm, ecx
+
+      lea ecx, [GLOBAL(pw_8)]
+      mov g_pw_8m, ecx
+
+      LOAD_IF_USED 0, 1         ; load eax, ecx back
+    %else
+      cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
+                                y_offset, dst, dst_stride, height, sse, \
+                                g_bilin_filter, g_pw_8
+      %define h heightd
+
+      ;Store bilin_filter and pw_8 location in stack
+      GET_GOT eax
+      add esp, 4                ; restore esp
+
+      lea ecx, [GLOBAL(bilin_filter_m)]
+      mov g_bilin_filterm, ecx
+
+      lea ecx, [GLOBAL(pw_8)]
+      mov g_pw_8m, ecx
+
+      LOAD_IF_USED 0, 1         ; load eax, ecx back
+    %endif
+  %else
+    %if %2 == 1 ; avg
+      cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
+                        7 + 2 * ARCH_X86_64, 13, src, src_stride, \
+                                             x_offset, y_offset, \
+                                             dst, dst_stride, \
+                                             sec, sec_stride, \
+                                             height, sse
+      %if ARCH_X86_64
+      %define h heightd
+      %define sec_str sec_strideq
+      %else
+      %define h dword heightm
+      %define sec_str sec_stridemp
+      %endif
+    %else
+      cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
+                              y_offset, dst, dst_stride, height, sse
+      %define h heightd
+    %endif
+
+    %define bilin_filter bilin_filter_m
+  %endif
 %endif
+
   ASSERT               %1 <= 16         ; m6 overflows if w > 16
   pxor                 m6, m6           ; sum
   pxor                 m7, m7           ; sse
@@ -329,11 +381,22 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %define filter_y_b m9
 %define filter_rnd m10
 %else ; x86-32 or mmx
+%if ARCH_X86=1 && CONFIG_PIC=1
+; x_offset == 0, reuse x_offset reg
+%define tempq x_offsetq
+  add y_offsetq, g_bilin_filterm
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+  mov tempq, g_pw_8m
+%define filter_rnd [tempq]
+%else
   add           y_offsetq, bilin_filter
 %define filter_y_a [y_offsetq]
 %define filter_y_b [y_offsetq+16]
 %define filter_rnd [pw_8]
 %endif
+%endif
+
 .x_zero_y_other_loop:
 %if %1 == 16
   movu                 m0, [srcq]
@@ -615,12 +678,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %define filter_y_a m8
 %define filter_y_b m9
 %define filter_rnd m10
+%else  ;x86_32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; x_offset == 0.5. We can reuse x_offset reg
+%define tempq x_offsetq
+  add y_offsetq, g_bilin_filterm
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+  mov tempq, g_pw_8m
+%define filter_rnd [tempq]
 %else
   add           y_offsetq, bilin_filter
 %define filter_y_a [y_offsetq]
 %define filter_y_b [y_offsetq+16]
 %define filter_rnd [pw_8]
 %endif
+%endif
+
 %if %1 == 16
   movu                 m0, [srcq]
   movu                 m3, [srcq+1]
@@ -752,12 +826,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %define filter_x_a m8
 %define filter_x_b m9
 %define filter_rnd m10
+%else    ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+;y_offset == 0. We can reuse y_offset reg.
+%define tempq y_offsetq
+  add x_offsetq, g_bilin_filterm
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+  mov tempq, g_pw_8m
+%define filter_rnd [tempq]
 %else
   add           x_offsetq, bilin_filter
 %define filter_x_a [x_offsetq]
 %define filter_x_b [x_offsetq+16]
 %define filter_rnd [pw_8]
 %endif
+%endif
+
 .x_other_y_zero_loop:
 %if %1 == 16
   movu                 m0, [srcq]
@@ -873,12 +958,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %define filter_x_a m8
 %define filter_x_b m9
 %define filter_rnd m10
+%else    ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; y_offset == 0.5. We can reuse y_offset reg.
+%define tempq y_offsetq
+  add x_offsetq, g_bilin_filterm
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+  mov tempq, g_pw_8m
+%define filter_rnd [tempq]
 %else
   add           x_offsetq, bilin_filter
 %define filter_x_a [x_offsetq]
 %define filter_x_b [x_offsetq+16]
 %define filter_rnd [pw_8]
 %endif
+%endif
+
 %if %1 == 16
   movu                 m0, [srcq]
   movu                 m1, [srcq+1]
@@ -1057,6 +1153,21 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %define filter_y_a m10
 %define filter_y_b m11
 %define filter_rnd m12
+%else   ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; In this case, there is NO unused register. Used src_stride register. Later,
+; src_stride has to be loaded from stack when it is needed.
+%define tempq src_strideq
+  mov tempq, g_bilin_filterm
+  add           x_offsetq, tempq
+  add           y_offsetq, tempq
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+
+  mov tempq, g_pw_8m
+%define filter_rnd [tempq]
 %else
   add           x_offsetq, bilin_filter
   add           y_offsetq, bilin_filter
@@ -1066,6 +1177,8 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %define filter_y_b [y_offsetq+16]
 %define filter_rnd [pw_8]
 %endif
+%endif
+
   ; x_offset == bilin interpolation && y_offset == bilin interpolation
 %if %1 == 16
   movu                 m0, [srcq]
@@ -1093,7 +1206,9 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %endif
   psraw                m0, 4
   psraw                m2, 4
-  add                srcq, src_strideq
+
+  INC_SRC_BY_SRC_STRIDE
+
   packuswb             m0, m2
 .x_other_y_other_loop:
 %if cpuflag(ssse3)
@@ -1163,7 +1278,7 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
   SUM_SSE              m0, m1, m2, m3, m6, m7
   mova                 m0, m4
 
-  add                srcq, src_strideq
+  INC_SRC_BY_SRC_STRIDE
   add                dstq, dst_strideq
 %else ; %1 < 16
   movh                 m0, [srcq]
@@ -1184,12 +1299,17 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %if cpuflag(ssse3)
   packuswb             m0, m0
 %endif
-  add                srcq, src_strideq
+
+  INC_SRC_BY_SRC_STRIDE
+
 .x_other_y_other_loop:
   movh                 m2, [srcq]
   movh                 m1, [srcq+1]
-  movh                 m4, [srcq+src_strideq]
-  movh                 m3, [srcq+src_strideq+1]
+
+  INC_SRC_BY_SRC_STRIDE
+  movh                 m4, [srcq]
+  movh                 m3, [srcq+1]
+
 %if cpuflag(ssse3)
   punpcklbw            m2, m1
   punpcklbw            m4, m3
@@ -1253,7 +1373,7 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
   SUM_SSE              m0, m1, m2, m3, m6, m7
   mova                 m0, m4
 
-  lea                srcq, [srcq+src_strideq*2]
+  INC_SRC_BY_SRC_STRIDE
   lea                dstq, [dstq+dst_strideq*2]
 %endif
 %if %2 == 1 ; avg
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 0badb0855..db36506a0 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -123,6 +123,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_1_add_neon$(AS
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_1_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_1_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht4x4_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 4d3967059..194203967 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -442,8 +442,6 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
       MAP(VP8E_SET_ARNR_TYPE,               xcfg.arnr_type);
       MAP(VP8E_SET_TUNING,                  xcfg.tuning);
       MAP(VP8E_SET_CQ_LEVEL,                xcfg.cq_level);
-      MAP(VP9E_SET_MAX_Q,                   ctx->cfg.rc_max_quantizer);
-      MAP(VP9E_SET_MIN_Q,                   ctx->cfg.rc_min_quantizer);
       MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT,   xcfg.rc_max_intra_bitrate_pct);
       MAP(VP9E_SET_LOSSLESS,                xcfg.lossless);
       MAP(VP9E_SET_FRAME_PARALLEL_DECODING, xcfg.frame_parallel_decoding_mode);
@@ -1009,66 +1007,40 @@ static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx,
   }
 }
 
-static vpx_codec_err_t vp9e_set_width(vpx_codec_alg_priv_t *ctx, int ctr_id,
-                                      va_list args) {
-  unsigned int *data = va_arg(args, unsigned int *);
-  if (data) {
-    int res;
-    res = vp9_set_size_literal(ctx->cpi, *data, 0);
-    if (!res) {
-      return VPX_CODEC_OK;
-    } else {
-      return VPX_CODEC_INVALID_PARAM;
-    }
-  } else {
-    return VPX_CODEC_INVALID_PARAM;
-  }
+static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
+                                    va_list args) {
+  int data = va_arg(args, int);
+  vp9_set_svc(ctx->cpi, data);
+  return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_set_height(vpx_codec_alg_priv_t *ctx,
-                                       int ctr_id,
-                                       va_list args) {
-  unsigned int *data =  va_arg(args, unsigned int *);
-
-  if (data) {
-    int res;
-    res = vp9_set_size_literal(ctx->cpi, 0, *data);
+static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
+                                               int ctr_id, va_list args) {
+  vpx_svc_parameters_t *data = va_arg(args, vpx_svc_parameters_t *);
+  VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
+  vpx_svc_parameters_t params;
 
-    if (!res) {
-      return VPX_CODEC_OK;
-    } else {
-      return VPX_CODEC_INVALID_PARAM;
-    }
-  } else {
+  if (data == NULL) {
     return VPX_CODEC_INVALID_PARAM;
   }
-}
-
-static vpx_codec_err_t vp9e_set_layer(vpx_codec_alg_priv_t *ctx,
-                                      int ctr_id,
-                                      va_list args) {
-  unsigned int *data =  va_arg(args, unsigned int *);
 
-  if (data) {
-    int res;
-    res = 0;
+  params = *(vpx_svc_parameters_t *)data;
 
-    res = vp9_switch_layer(ctx->cpi, *data);
+  cpi->current_layer = params.layer;
+  cpi->lst_fb_idx = params.lst_fb_idx;
+  cpi->gld_fb_idx = params.gld_fb_idx;
+  cpi->alt_fb_idx = params.alt_fb_idx;
 
-    if (!res) {
-      return VPX_CODEC_OK;
-    } else {
-      return VPX_CODEC_INVALID_PARAM;
-    }
-  } else {
+  if (vp9_set_size_literal(ctx->cpi, params.width, params.height) != 0) {
     return VPX_CODEC_INVALID_PARAM;
   }
-}
 
-static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
-                                    va_list args) {
-  int data = va_arg(args, int);
-  vp9_set_svc(ctx->cpi, data);
+  ctx->cfg.rc_max_quantizer = params.max_quantizer;
+  ctx->cfg.rc_min_quantizer = params.min_quantizer;
+
+  set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
+  vp9_change_config(ctx->cpi, &ctx->oxcf);
+
   return VPX_CODEC_OK;
 }
 
@@ -1096,16 +1068,12 @@ static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = {
   {VP8E_SET_ARNR_TYPE,                set_param},
   {VP8E_SET_TUNING,                   set_param},
   {VP8E_SET_CQ_LEVEL,                 set_param},
-  {VP9E_SET_MAX_Q,                    set_param},
-  {VP9E_SET_MIN_Q,                    set_param},
   {VP8E_SET_MAX_INTRA_BITRATE_PCT,    set_param},
   {VP9E_SET_LOSSLESS,                 set_param},
   {VP9E_SET_FRAME_PARALLEL_DECODING,  set_param},
   {VP9_GET_REFERENCE,                 get_reference},
-  {VP9E_SET_WIDTH,                    vp9e_set_width},
-  {VP9E_SET_HEIGHT,                   vp9e_set_height},
-  {VP9E_SET_LAYER,                    vp9e_set_layer},
   {VP9E_SET_SVC,                      vp9e_set_svc},
+  {VP9E_SET_SVC_PARAMETERS,           vp9e_set_svc_parameters},
   { -1, NULL},
 };
 
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index 8bb582ffa..9acfa29bc 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -13,61 +13,101 @@
  * VP9 encoding scheme based on spatial scalability for video applications
  * that benefit from a scalable bitstream.
  */
-#include <stdio.h>
-#include <stdlib.h>
+
 #include <stdarg.h>
-#include <time.h>
+#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
-#include <libgen.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
+#include <time.h>
+#include "./args.h"
+#include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
-#define interface (vpx_codec_vp9_cx())
-#define fourcc 0x30395056
-#define IVF_FILE_HDR_SZ (32)
-#define IVF_FRAME_HDR_SZ (12)
-#define NUM_BUFFERS 8
-
-char *input_filename;
-char *output_filename;
-unsigned int number_frames_to_code = 60 * 60;
-unsigned int number_frames_to_skip = 0;
-unsigned int number_spatial_layers = 5;
-unsigned int key_period = 100;
-
-typedef enum ENCODING_MODE {
-  INTER_LAYER_PREDICTION_I,
-  INTER_LAYER_PREDICTION_IP,
-  USE_GOLDEN_FRAME
-} ENCODING_MODE;
-
-static void mem_put_le16(char *mem, unsigned int val) {
+#include "vpx/vpx_encoder.h"
+
+#define VP90_FOURCC 0x30395056
+
+static const struct arg_enum_list encoding_mode_enum[] = {
+  {"i", INTER_LAYER_PREDICTION_I},
+  {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP},
+  {"ip", INTER_LAYER_PREDICTION_IP},
+  {"gf", USE_GOLDEN_FRAME},
+  {NULL, 0}
+};
+
+static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM(
+    "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum);
+static const arg_def_t skip_frames_arg =
+    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
+static const arg_def_t frames_arg =
+    ARG_DEF("f", "frames", 1, "number of frames to encode");
+static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
+static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
+static const arg_def_t timebase_arg =
+    ARG_DEF("t", "timebase", 1, "timebase (num/den)");
+static const arg_def_t bitrate_arg = ARG_DEF(
+    "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
+static const arg_def_t layers_arg =
+    ARG_DEF("l", "layers", 1, "number of SVC layers");
+static const arg_def_t kf_dist_arg =
+    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
+static const arg_def_t scale_factors_arg =
+    ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
+static const arg_def_t quantizers_arg =
+    ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)");
+static const arg_def_t dummy_frame_arg =
+    ARG_DEF("z", "dummy-frame", 1, "make first frame blank and full size");
+
+static const arg_def_t *svc_args[] = {
+  &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
+  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
+  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  &dummy_frame_arg,
+  NULL
+};
+
+static const SVC_ENCODING_MODE default_encoding_mode =
+    INTER_LAYER_PREDICTION_IP;
+static const uint32_t default_frames_to_skip = 0;
+static const uint32_t default_frames_to_code = 60 * 60;
+static const uint32_t default_width = 1920;
+static const uint32_t default_height = 1080;
+static const uint32_t default_timebase_num = 1;
+static const uint32_t default_timebase_den = 60;
+static const uint32_t default_bitrate = 1000;
+static const uint32_t default_spatial_layers = 5;
+static const uint32_t default_kf_dist = 100;
+static const int default_use_dummy_frame = 1;
+
+typedef struct {
+  char *input_filename;
+  char *output_filename;
+  uint32_t frames_to_code;
+  uint32_t frames_to_skip;
+} AppInput;
+
+static void mem_put_le16(char *mem, uint32_t val) {
   mem[0] = val;
   mem[1] = val >> 8;
 }
 
-static void mem_put_le32(char *mem, unsigned int val) {
+static void mem_put_le32(char *mem, uint32_t val) {
   mem[0] = val;
   mem[1] = val >> 8;
   mem[2] = val >> 16;
   mem[3] = val >> 24;
 }
 
-static void usage(char *program_name) {
-  printf(
-      "Usage: %s [-f frames] [-s skip_frames] [-w width] [-h height] \n\t"
-      "[-n rate_num] [-d rate_den] [-b bitrate] [-l layers] "
-      "<input_filename> <output_filename>\n",
-      basename(program_name));
+static void usage(const char *exec_name) {
+  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
+          exec_name);
+  fprintf(stderr, "Options:\n");
+  arg_show_usage(stderr, svc_args);
   exit(EXIT_FAILURE);
 }
 
-static void die(const char *fmt, ...) {
+void die(const char *fmt, ...) {
   va_list ap;
 
   va_start(ap, fmt);
-  vprintf(fmt, ap);
+  vfprintf(stderr, fmt, ap);
   if (fmt[strlen(fmt) - 1] != '\n') printf("\n");
   exit(EXIT_FAILURE);
 }
@@ -81,407 +121,261 @@ static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
 }
 
 static int read_frame(FILE *f, vpx_image_t *img) {
-  size_t nbytes, to_read;
+  size_t nbytes;
   int res = 1;
+  int plane;
 
-  to_read = img->w * img->h * 3 / 2;
-  nbytes = fread(img->planes[0], 1, to_read, f);
-  if (nbytes != to_read) {
-    res = 0;
-    if (nbytes > 0)
-      printf("Warning: Read partial frame. Check your width & height!\n");
+  for (plane = 0; plane < 3; ++plane) {
+    uint8_t *ptr;
+    const int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+    const int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+    int r;
+
+    switch (plane) {
+      case 1:
+        ptr = img->planes[VPX_PLANE_U];
+        break;
+      case 2:
+        ptr = img->planes[VPX_PLANE_V];
+        break;
+      default:
+        ptr = img->planes[plane];
+    }
+    for (r = 0; r < h; ++r) {
+      const int to_read = w;
+
+      nbytes = fread(ptr, 1, to_read, f);
+      if (nbytes != to_read) {
+        res = 0;
+        if (nbytes > 0)
+          printf("Warning: Read partial frame. Check your width & height!\n");
+        break;
+      }
+      ptr += img->stride[plane];
+    }
+    if (!res) break;
   }
   return res;
 }
 
-static int read_dummy_frame(vpx_image_t *img) {
-  size_t to_read;
-
-  to_read = img->w * img->h * 3 / 2;
-  memset(img->planes[0], 129, to_read);
+static int create_dummy_frame(vpx_image_t *img) {
+  const size_t buf_size = img->w * img->h * 3 / 2;
+  memset(img->planes[0], 129, buf_size);
   return 1;
 }
 
-static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg,
+static void write_ivf_file_header(FILE *outfile,
+                                  uint32_t width, uint32_t height,
+                                  int timebase_num, int timebase_den,
                                   int frame_cnt) {
   char header[32];
 
-  if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return;
   header[0] = 'D';
   header[1] = 'K';
   header[2] = 'I';
   header[3] = 'F';
-  mem_put_le16(header + 4, 0);                    /* version */
-  mem_put_le16(header + 6, 32);                   /* headersize */
-  mem_put_le32(header + 8, fourcc);               /* headersize */
-  mem_put_le16(header + 12, cfg->g_w);            /* width */
-  mem_put_le16(header + 14, cfg->g_h);            /* height */
-  mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
-  mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
-  mem_put_le32(header + 24, frame_cnt);           /* length */
-  mem_put_le32(header + 28, 0);                   /* unused */
+  mem_put_le16(header + 4, 0);             /* version */
+  mem_put_le16(header + 6, 32);            /* headersize */
+  mem_put_le32(header + 8, VP90_FOURCC);   /* fourcc */
+  mem_put_le16(header + 12, width);        /* width */
+  mem_put_le16(header + 14, height);       /* height */
+  mem_put_le32(header + 16, timebase_den); /* rate */
+  mem_put_le32(header + 20, timebase_num); /* scale */
+  mem_put_le32(header + 24, frame_cnt);    /* length */
+  mem_put_le32(header + 28, 0);            /* unused */
 
   (void)fwrite(header, 1, 32, outfile);
 }
 
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt) {
+static void write_ivf_frame_header(FILE *outfile, vpx_codec_pts_t pts,
+                                   size_t sz) {
   char header[12];
-  vpx_codec_pts_t pts;
-
-  if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return;
-
-  pts = pkt->data.frame.pts;
-  mem_put_le32(header, pkt->data.frame.sz);
+  mem_put_le32(header, (uint32_t)sz);
   mem_put_le32(header + 4, pts & 0xFFFFFFFF);
   mem_put_le32(header + 8, pts >> 32);
 
   (void)fwrite(header, 1, 12, outfile);
 }
 
-static void check_parameters() {
-  if (number_spatial_layers > 5) die("Cannot support more than 5 layers");
-}
-
-static void parse_command_line(int argc, char **argv,
-                               vpx_codec_enc_cfg_t *cfg) {
-  unsigned int width = 1920;
-  unsigned int height = 1080;
-  unsigned int timebase_num = 1;
-  unsigned int timebase_den = 60;
-  unsigned int bitrate = 1000;
-  int c;
+static void parse_command_line(int argc, const char **argv_,
+                               AppInput *app_input, SvcContext *svc_ctx,
+                               vpx_codec_enc_cfg_t *enc_cfg) {
+  struct arg arg;
+  char **argv, **argi, **argj;
   vpx_codec_err_t res;
 
-  opterr = 0;
-  while ((c = getopt(argc, argv, "f:w:h:n:d:b:s:l:p:")) != -1) switch (c) {
-      case 'f':
-        number_frames_to_code = atoi(optarg);
-        break;
-      case 'w':
-        width = atoi(optarg);
-        break;
-      case 'h':
-        height = atoi(optarg);
-        break;
-      case 'n':
-        timebase_num = atoi(optarg);
-        break;
-      case 'd':
-        timebase_den = atoi(optarg);
-        break;
-      case 'b':
-        bitrate = atoi(optarg);
-        break;
-      case 's':
-        number_frames_to_skip = atoi(optarg);
-        break;
-      case 'l':
-        number_spatial_layers = atoi(optarg);
-        break;
-      case 'p':
-        key_period = atoi(optarg);
-        break;
-      case '?':
-        usage(argv[0]);
-    }
-
-  // Parse required parameters
-  if (argc - optind != 2) {
-    usage(argv[0]);
-  }
+  // initialize SvcContext with parameters that will be passed to vpx_svc_init
+  svc_ctx->log_level = SVC_LOG_DEBUG;
+  svc_ctx->spatial_layers = default_spatial_layers;
+  svc_ctx->encoding_mode = default_encoding_mode;
+  // when using a dummy frame, that frame is only encoded to be full size
+  svc_ctx->first_frame_full_size = default_use_dummy_frame;
 
-  input_filename = argv[optind];
-  output_filename = argv[optind + 1];
-
-  if (width < 16 || width % 2 || height < 16 || height % 2)
-    die("Invalid resolution: %d x %d", width, height);
-
-  /* Populate encoder configuration */
-  res = vpx_codec_enc_config_default(interface, cfg, 0);
+  // start with default encoder configuration
+  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
   if (res) {
     die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
   }
-  printf(
-      "Codec %s\nframes: %d, skip: %d, layers: %d\n"
-      "width %d, height: %d, \n"
-      "num: %d, den: %d, bitrate: %d, \n"
-      "key period: %d \n",
-      vpx_codec_iface_name(interface), number_frames_to_code,
-      number_frames_to_skip, number_spatial_layers, width, height, timebase_num,
-      timebase_den, bitrate, key_period);
-
-  // Do minimal check at the application level. Encoder parameters will be
-  // checked internally
-  check_parameters();
-
-  cfg->rc_target_bitrate = bitrate;
-  cfg->g_w = width;
-  cfg->g_h = height;
-  cfg->g_timebase.num = timebase_num;
-  cfg->g_timebase.den = timebase_den;
-  cfg->ss_number_layers = number_spatial_layers;
-}
-
-static void set_default_configuration(vpx_codec_enc_cfg_t *cfg) {
-  /* Real time parameters */
-  cfg->rc_dropframe_thresh = 0;
-  cfg->rc_end_usage = VPX_CBR;
-  cfg->rc_resize_allowed = 0;
-  cfg->rc_min_quantizer = 33;
-  cfg->rc_max_quantizer = 33;
-  cfg->rc_undershoot_pct = 100;
-  cfg->rc_overshoot_pct = 15;
-  cfg->rc_buf_initial_sz = 500;
-  cfg->rc_buf_optimal_sz = 600;
-  cfg->rc_buf_sz = 1000;
-
-  /* Enable error resilient mode */
-  cfg->g_error_resilient = 1;
-  cfg->g_lag_in_frames = 0;
-
-  /* Disable automatic keyframe placement */
-  cfg->kf_mode = VPX_KF_DISABLED;
-  cfg->kf_min_dist = cfg->kf_max_dist = 3000;
-}
-
-static void initialize_codec(vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *cfg) {
-  int max_intra_size_pct;
-
-  /* Initialize codec */
-  if (vpx_codec_enc_init(codec, interface, cfg, VPX_CODEC_USE_PSNR))
-    die_codec(codec, "Failed to initialize encoder");
-
-  vpx_codec_control(codec, VP9E_SET_SVC, 1);
-  /* Cap CPU & first I-frame size */
-  vpx_codec_control(codec, VP8E_SET_CPUUSED, 1);
-  vpx_codec_control(codec, VP8E_SET_STATIC_THRESHOLD, 1);
-  vpx_codec_control(codec, VP8E_SET_NOISE_SENSITIVITY, 1);
-  vpx_codec_control(codec, VP8E_SET_TOKEN_PARTITIONS, 1);
-
-  max_intra_size_pct =
-      (int)(((double)cfg->rc_buf_optimal_sz * 0.5) *
-            ((double)cfg->g_timebase.den / cfg->g_timebase.num) / 10.0);
-  /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */
+  // update enc_cfg with app default values
+  enc_cfg->g_w = default_width;
+  enc_cfg->g_h = default_height;
+  enc_cfg->g_timebase.num = default_timebase_num;
+  enc_cfg->g_timebase.den = default_timebase_den;
+  enc_cfg->rc_target_bitrate = default_bitrate;
+  enc_cfg->kf_min_dist = default_kf_dist;
+  enc_cfg->kf_max_dist = default_kf_dist;
+
+  // initialize AppInput with default values
+  app_input->frames_to_code = default_frames_to_code;
+  app_input->frames_to_skip = default_frames_to_skip;
+
+  // process command line options
+  argv = argv_dup(argc - 1, argv_ + 1);
+  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
+    arg.argv_step = 1;
+
+    if (arg_match(&arg, &encoding_mode_arg, argi)) {
+      svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg);
+    } else if (arg_match(&arg, &frames_arg, argi)) {
+      app_input->frames_to_code = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &width_arg, argi)) {
+      enc_cfg->g_w = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &height_arg, argi)) {
+      enc_cfg->g_h = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &height_arg, argi)) {
+      enc_cfg->g_h = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &timebase_arg, argi)) {
+      enc_cfg->g_timebase = arg_parse_rational(&arg);
+    } else if (arg_match(&arg, &bitrate_arg, argi)) {
+      enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &skip_frames_arg, argi)) {
+      app_input->frames_to_skip = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &layers_arg, argi)) {
+      svc_ctx->spatial_layers = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
+      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
+      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
+    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
+      vpx_svc_set_scale_factors(svc_ctx, arg.val);
+    } else if (arg_match(&arg, &quantizers_arg, argi)) {
+      vpx_svc_set_quantizers(svc_ctx, arg.val);
+    } else if (arg_match(&arg, &dummy_frame_arg, argi)) {
+      svc_ctx->first_frame_full_size = arg_parse_int(&arg);
+    } else {
+      ++argj;
+    }
+  }
 
-  vpx_codec_control(codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
-}
+  // Check for unrecognized options
+  for (argi = argv; *argi; ++argi)
+    if (argi[0][0] == '-' && strlen(argi[0]) > 1)
+      die("Error: Unrecognized option %s\n", *argi);
 
-static int calculate_layer(int frame_cnt, int number_spatial_layers) {
-  if (frame_cnt == 0)
-    return 0;
-  else
-    return (frame_cnt + number_spatial_layers - 1) % number_spatial_layers;
-}
+  if (argv[0] == NULL || argv[1] == 0) {
+    usage(argv_[0]);
+  }
+  app_input->input_filename = argv[0];
+  app_input->output_filename = argv[1];
+  free(argv);
 
-static void switch_to_layer(int layer, unsigned int initial_width,
-                            unsigned int initial_height,
-                            vpx_codec_ctx_t *codec) {
-  // Set layer size
-  int scaling_factor_num[MAX_LAYERS] = {2, 1, 4, 2, 1};
-  int scaling_factor_den[MAX_LAYERS] = {9, 3, 9, 3, 1};
-
-  int quantizer[MAX_LAYERS] = {60, 53, 39, 33, 27};
-
-  unsigned int current_width;
-  unsigned int current_height;
-
-  current_width = initial_width *
-                  scaling_factor_num[layer + 5 - number_spatial_layers] /
-                  scaling_factor_den[layer + 5 - number_spatial_layers];
-  current_height = initial_height *
-                   scaling_factor_num[layer + 5 - number_spatial_layers] /
-                   scaling_factor_den[layer + 5 - number_spatial_layers];
-
-  current_width += current_width % 2;
-  current_height += current_height % 2;
-
-  vpx_codec_control(codec, VP9E_SET_WIDTH, &current_width);
-  vpx_codec_control(codec, VP9E_SET_HEIGHT, &current_height);
-
-  // Set layer context
-  vpx_codec_control(codec, VP9E_SET_LAYER, &layer);
-  vpx_codec_control(codec, VP9E_SET_MAX_Q,
-                    quantizer[layer + 5 - number_spatial_layers]);
-  vpx_codec_control(codec, VP9E_SET_MIN_Q,
-                    quantizer[layer + 5 - number_spatial_layers]);
-}
+  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
+      enc_cfg->g_h % 2)
+    die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
 
-static int get_flag(int is_I_frame_in_layer, int layer, ENCODING_MODE mode) {
-  // First layer
-  switch (mode) {
-    case INTER_LAYER_PREDICTION_I:
-      if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF;
-      if (layer == 0)
-        return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-               VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      else if (is_I_frame_in_layer)
-        return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-               VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST;
-      else
-        return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-               VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      break;
-
-    case INTER_LAYER_PREDICTION_IP:
-      if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF;
-      if (layer == 0)
-        return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-               VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      else if (is_I_frame_in_layer)
-        return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-               VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST;
-      else
-        return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
-      break;
-
-    case USE_GOLDEN_FRAME:
-      if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF;
-      if (2 * number_spatial_layers - NUM_BUFFERS <= layer) {
-        if (layer == 0)
-          return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                 VP8_EFLAG_NO_REF_ARF;
-        else if (is_I_frame_in_layer)
-          return VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF |
-                 VP8_EFLAG_NO_REF_LAST;
-        else
-          return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      } else {
-        if (layer == 0)
-          return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-        else if (is_I_frame_in_layer)
-          return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST;
-        else
-          return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      }
-      break;
-    default:
-      return VPX_EFLAG_FORCE_KF;
-  }
+  printf(
+      "Codec %s\nframes: %d, skip: %d\n"
+      "mode: %d, layers: %d\n"
+      "width %d, height: %d,\n"
+      "num: %d, den: %d, bitrate: %d,\n"
+      "gop size: %d, use_dummy_frame: %d\n",
+      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
+      app_input->frames_to_skip, svc_ctx->encoding_mode,
+      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
+      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
+      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist,
+      svc_ctx->first_frame_full_size);
 }
 
-int main(int argc, char **argv) {
-  FILE *infile, *outfile[MAX_LAYERS];
+int main(int argc, const char **argv) {
+  AppInput app_input = {0};
+  FILE *infile, *outfile;
   vpx_codec_ctx_t codec;
-  vpx_codec_enc_cfg_t cfg;
-  int frame_cnt = 0;
+  vpx_codec_enc_cfg_t enc_cfg;
+  SvcContext svc_ctx;
+  uint32_t i;
+  uint32_t frame_cnt = 0;
   vpx_image_t raw;
-  int frame_avail = 1;
-  int got_data = 0;
-  int i;
-  int frames_in_layer[MAX_LAYERS] = {0};
-  clock_t before;
-  clock_t after;
+  vpx_codec_err_t res;
   int pts = 0;            /* PTS starts at 0 */
   int frame_duration = 1; /* 1 timebase tick per frame */
 
-  parse_command_line(argc, argv, &cfg);
+  memset(&svc_ctx, 0, sizeof(svc_ctx));
+  svc_ctx.log_print = 1;
+  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
 
   // Allocate image buffer
-  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, cfg.g_w, cfg.g_h, 32))
-    die("Failed to allocate image", cfg.g_w, cfg.g_h);
-
-  set_default_configuration(&cfg);
-
-  /* Open input file */
-  if (!(infile = fopen(input_filename, "rb")))
-    die("Failed to open %s for reading", argv[1]);
-
-  /* Open output file  */
-  for (i = 0; i < number_spatial_layers; i++) {
-    char file_name[512];
-    snprintf(file_name, sizeof(file_name), "%s_%d.ivf", output_filename, i);
-    if (!(outfile[i] = fopen(file_name, "wb")))
-      die("Failed to open %s for writing", file_name);
-    write_ivf_file_header(outfile[i], &cfg, 0);
-  }
+  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32))
+    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
+
+  if (!(infile = fopen(app_input.input_filename, "rb")))
+    die("Failed to open %s for reading\n", app_input.input_filename);
+
+  if (!(outfile = fopen(app_input.output_filename, "wb")))
+    die("Failed to open %s for writing\n", app_input.output_filename);
+
+  // Initialize codec
+  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
+      VPX_CODEC_OK)
+    die("Failed to initialize encoder\n");
 
-  initialize_codec(&codec, &cfg);
+  write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h,
+                        enc_cfg.g_timebase.num, enc_cfg.g_timebase.den, 0);
 
   // skip initial frames
-  for (i = 0; i < number_frames_to_skip; i++) {
+  for (i = 0; i < app_input.frames_to_skip; ++i) {
     read_frame(infile, &raw);
   }
 
-  before = clock();
-  // Encoding frames
-  while ((frame_avail || got_data) &&
-         frame_cnt <= number_frames_to_code * number_spatial_layers) {
-    int flags = 0;
-    vpx_codec_iter_t iter = NULL;
-    const vpx_codec_cx_pkt_t *pkt;
-
-    int layer = calculate_layer(frame_cnt, number_spatial_layers);
-    int is_I_frame_in_layer =
-        (((frame_cnt - 1) / number_spatial_layers % key_period) == 0);
-    int is_dummy = (frame_cnt == 0);
-
-    if (is_dummy) {  // Dummy frame
-      flags = VPX_EFLAG_FORCE_KF;
-      frame_avail = read_dummy_frame(&raw);
-
-    } else {  // Regular frame
-      // Read a new frame only at the base layer
-      if (layer == 0) frame_avail = read_frame(infile, &raw);
-      switch_to_layer(layer, cfg.g_w, cfg.g_h, &codec);
-      flags = get_flag(is_I_frame_in_layer, layer, INTER_LAYER_PREDICTION_I);
+  // Encode frames
+  while (frame_cnt <= app_input.frames_to_code) {
+    if (frame_cnt == 0 && svc_ctx.first_frame_full_size) {
+      create_dummy_frame(&raw);
+    } else {
+      if (!read_frame(infile, &raw)) break;
     }
-
-    // Actual Encoding
-    if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags,
-                         VPX_DL_REALTIME))
+    res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
+                         VPX_DL_REALTIME);
+    printf("%s", vpx_svc_get_message(&svc_ctx));
+    if (res != VPX_CODEC_OK) {
       die_codec(&codec, "Failed to encode frame");
-
-    got_data = 0;
-    // Process data / Get PSNR statistics
-    while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) {
-      got_data = 1;
-      switch (pkt->kind) {
-        case VPX_CODEC_CX_FRAME_PKT:
-          for (i = layer; i < number_spatial_layers; i++) {
-            write_ivf_frame_header(outfile[i], pkt);
-            (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                         outfile[i]);
-            frames_in_layer[i]++;
-          }
-          break;
-        case VPX_CODEC_PSNR_PKT:
-          if (frame_cnt != 0)
-            printf(
-                "Processed Frame %d, layer %d, PSNR(Total/Y/U/V): "
-                "%2.3f  %2.3f  %2.3f  %2.3f \n",
-                (frame_cnt - 1) / number_spatial_layers + 1, layer,
-                pkt->data.psnr.psnr[0], pkt->data.psnr.psnr[1],
-                pkt->data.psnr.psnr[2], pkt->data.psnr.psnr[3]);
-          break;
-        default:
-          break;
-      }
     }
-    frame_cnt++;
-    // TODO(ivan): Modify ts later if(!layer)
+    if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
+      write_ivf_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
+      (void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
+                   vpx_svc_get_frame_size(&svc_ctx), outfile);
+    }
+    ++frame_cnt;
     pts += frame_duration;
   }
-  // end while
 
-  after = clock();
-  printf("Processed %d frames in different resolutions in %ld ms.\n",
-         frame_cnt - 1, (int)(after - before) / (CLOCKS_PER_SEC / 1000));
+  printf("Processed %d frames\n", frame_cnt - svc_ctx.first_frame_full_size);
 
   fclose(infile);
-
   if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
 
-  /* Try to rewrite the output file headers with the actual frame count */
-  for (i = 0; i < number_spatial_layers; i++) {
-    if (!fseek(outfile[i], 0, SEEK_SET)) {
-      write_ivf_file_header(outfile[i], &cfg, frames_in_layer[i]);
-    }
-    fclose(outfile[i]);
+  // rewrite the output file headers with the actual frame count
+  if (!fseek(outfile, 0, SEEK_SET)) {
+    write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h,
+                          enc_cfg.g_timebase.num, enc_cfg.g_timebase.den,
+                          frame_cnt);
   }
+  fclose(outfile);
+  vpx_img_free(&raw);
+
+  // display average size, psnr
+  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
+
+  vpx_svc_release(&svc_ctx);
 
   return EXIT_SUCCESS;
 }
diff --git a/vpx/exports_enc b/vpx/exports_enc
index 3d5674926..1d9340c67 100644
--- a/vpx/exports_enc
+++ b/vpx/exports_enc
@@ -6,3 +6,17 @@ text vpx_codec_get_cx_data
 text vpx_codec_get_global_headers
 text vpx_codec_get_preview_frame
 text vpx_codec_set_cx_data_buf
+text vpx_svc_dump_statistics
+text vpx_svc_encode
+text vpx_svc_free
+text vpx_svc_get_buffer
+text vpx_svc_get_encode_frame_count
+text vpx_svc_get_frame_size
+text vpx_svc_get_message
+text vpx_svc_init
+text vpx_svc_is_keyframe
+text vpx_svc_release
+text vpx_svc_set_keyframe
+text vpx_svc_set_options
+text vpx_svc_set_quantizers
+text vpx_svc_set_scale_factors
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
new file mode 100644
index 000000000..57d21dce5
--- /dev/null
+++ b/vpx/src/svc_encodeframe.c
@@ -0,0 +1,981 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/**
+ * @file
+ * VP9 SVC encoding support via libvpx
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define VPX_DISABLE_CTRL_TYPECHECKS 1
+#define VPX_CODEC_DISABLE_COMPAT 1
+#include "vpx/svc_context.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+
+#if defined(__MINGW32__) && !defined(MINGW_HAS_SECURE_API)
+#define strtok_r strtok_s
+// proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h
+_CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context);
+#endif
+
+#ifdef _MSC_VER
+#define strdup _strdup
+#define strtok_r strtok_s
+#endif
+
+#define SVC_REFERENCE_FRAMES 8
+#define SUPERFRAME_SLOTS (8)
+#define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2)
+#define OPTION_BUFFER_SIZE 256
+
+static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27";
+static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16";
+
+typedef struct SvcInternal {
+  char options[OPTION_BUFFER_SIZE];        // set by vpx_svc_set_options
+  char quantizers[OPTION_BUFFER_SIZE];     // set by vpx_svc_set_quantizers
+  char scale_factors[OPTION_BUFFER_SIZE];  // set by vpx_svc_set_scale_factors
+
+  // values extracted from option, quantizers
+  int scaling_factor_num[VPX_SS_MAX_LAYERS];
+  int scaling_factor_den[VPX_SS_MAX_LAYERS];
+  int quantizer[VPX_SS_MAX_LAYERS];
+
+  // accumulated statistics
+  double psnr_in_layer[VPX_SS_MAX_LAYERS];
+  uint32_t bytes_in_layer[VPX_SS_MAX_LAYERS];
+
+  // codec encoding values
+  int width;    // width of highest layer
+  int height;   // height of highest layer
+  int kf_dist;  // distance between keyframes
+
+  // state variables
+  int encode_frame_count;
+  int frame_within_gop;
+  vpx_enc_frame_flags_t enc_frame_flags;
+  int layers;
+  int layer;
+  int is_keyframe;
+
+  size_t frame_size;
+  size_t buffer_size;
+  void *buffer;
+
+  char message_buffer[2048];
+  vpx_codec_ctx_t *codec_ctx;
+} SvcInternal;
+
+// Superframe is used to generate an index of individual frames (i.e., layers)
+struct Superframe {
+  int count;
+  uint32_t sizes[SUPERFRAME_SLOTS];
+  uint32_t magnitude;
+  uint8_t buffer[SUPERFRAME_BUFFER_SIZE];
+  size_t index_size;
+};
+
+// One encoded frame layer
+struct LayerData {
+  void *buf;    // compressed data buffer
+  size_t size;  // length of compressed data
+  struct LayerData *next;
+};
+
+// create LayerData from encoder output
+static struct LayerData *ld_create(void *buf, size_t size) {
+  struct LayerData *const layer_data = malloc(sizeof(*layer_data));
+  if (layer_data == NULL) {
+    return NULL;
+  }
+  layer_data->buf = malloc(size);
+  if (layer_data->buf == NULL) {
+    free(layer_data);
+    return NULL;
+  }
+  memcpy(layer_data->buf, buf, size);
+  layer_data->size = size;
+  return layer_data;
+}
+
+// free LayerData
+static void ld_free(struct LayerData *layer_data) {
+  if (layer_data) {
+    if (layer_data->buf) {
+      free(layer_data->buf);
+      layer_data->buf = NULL;
+    }
+    free(layer_data);
+  }
+}
+
+// add layer data to list
+static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) {
+  struct LayerData **p = list;
+
+  while (*p != NULL) p = &(*p)->next;
+  *p = layer_data;
+  layer_data->next = NULL;
+}
+
+// get accumulated size of layer data
+static size_t ld_list_get_buffer_size(struct LayerData *list) {
+  struct LayerData *p;
+  size_t size = 0;
+
+  for (p = list; p != NULL; p = p->next) {
+    size += p->size;
+  }
+  return size;
+}
+
+// copy layer data to buffer
+static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) {
+  struct LayerData *p;
+
+  for (p = list; p != NULL; p = p->next) {
+    buffer[0] = 1;
+    memcpy(buffer, p->buf, p->size);
+    buffer += p->size;
+  }
+}
+
+// free layer data list
+static void ld_list_free(struct LayerData *list) {
+  struct LayerData *p = list;
+
+  while (p) {
+    list = list->next;
+    ld_free(p);
+    p = list;
+  }
+}
+
+static void sf_create_index(struct Superframe *sf) {
+  uint8_t marker = 0xc0;
+  int i;
+  uint32_t mag, mask;
+  uint8_t *bufp;
+
+  if (sf->count == 0 || sf->count >= 8) return;
+
+  // Add the number of frames to the marker byte
+  marker |= sf->count - 1;
+
+  // Choose the magnitude
+  for (mag = 0, mask = 0xff; mag < 4; ++mag) {
+    if (sf->magnitude < mask) break;
+    mask <<= 8;
+    mask |= 0xff;
+  }
+  marker |= mag << 3;
+
+  // Write the index
+  sf->index_size = 2 + (mag + 1) * sf->count;
+  bufp = sf->buffer;
+
+  *bufp++ = marker;
+  for (i = 0; i < sf->count; ++i) {
+    int this_sz = sf->sizes[i];
+    uint32_t j;
+
+    for (j = 0; j <= mag; ++j) {
+      *bufp++ = this_sz & 0xff;
+      this_sz >>= 8;
+    }
+  }
+  *bufp++ = marker;
+}
+
+static SvcInternal *get_svc_internal(SvcContext *svc_ctx) {
+  if (svc_ctx == NULL) return NULL;
+  if (svc_ctx->internal == NULL) {
+    SvcInternal *const si = malloc(sizeof(*si));
+    if (si != NULL) {
+      memset(si, 0, sizeof(*si));
+    }
+    svc_ctx->internal = si;
+  }
+  return svc_ctx->internal;
+}
+
+static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) {
+  if (svc_ctx == NULL) return NULL;
+  return svc_ctx->internal;
+}
+
+static void svc_log_reset(SvcContext *svc_ctx) {
+  SvcInternal *const si = (SvcInternal *)svc_ctx->internal;
+  si->message_buffer[0] = '\0';
+}
+
+static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) {
+  char buf[512];
+  int retval = 0;
+  va_list ap;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+
+  if (level > svc_ctx->log_level) {
+    return retval;
+  }
+
+  va_start(ap, fmt);
+  retval = vsnprintf(buf, sizeof(buf), fmt, ap);
+  va_end(ap);
+
+  if (svc_ctx->log_print) {
+    printf("%s", buf);
+  } else {
+    strncat(si->message_buffer, buf,
+            sizeof(si->message_buffer) - strlen(si->message_buffer) - 1);
+  }
+
+  if (level == SVC_LOG_ERROR) {
+    si->codec_ctx->err_detail = si->message_buffer;
+  }
+  return retval;
+}
+
+static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx,
+                                                const char *value_str) {
+  if (strcmp(value_str, "i") == 0) {
+    svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_I;
+  } else if (strcmp(value_str, "alt-ip") == 0) {
+    svc_ctx->encoding_mode = ALT_INTER_LAYER_PREDICTION_IP;
+  } else if (strcmp(value_str, "ip") == 0) {
+    svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_IP;
+  } else if (strcmp(value_str, "gf") == 0) {
+    svc_ctx->encoding_mode = USE_GOLDEN_FRAME;
+  } else {
+    svc_log(svc_ctx, SVC_LOG_ERROR, "invalid encoding mode: %s", value_str);
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx,
+                                              const char *quantizer_values) {
+  char *input_string;
+  char *token;
+  const char *delim = ",";
+  char *save_ptr;
+  int found = 0;
+  int i, q;
+  int res = VPX_CODEC_OK;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+
+  if (quantizer_values == NULL || strlen(quantizer_values) == 0) {
+    input_string = strdup(DEFAULT_QUANTIZER_VALUES);
+  } else {
+    input_string = strdup(quantizer_values);
+  }
+
+  token = strtok_r(input_string, delim, &save_ptr);
+  for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+    if (token != NULL) {
+      q = atoi(token);
+      if (q <= 0 || q > 100) {
+        svc_log(svc_ctx, SVC_LOG_ERROR,
+                "svc-quantizer-values: invalid value %s\n", token);
+        res = VPX_CODEC_INVALID_PARAM;
+        break;
+      }
+      token = strtok_r(NULL, delim, &save_ptr);
+      found = i + 1;
+    } else {
+      q = 0;
+    }
+    si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q;
+  }
+  if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
+    svc_log(svc_ctx, SVC_LOG_ERROR,
+            "svc: quantizers: %d values required, but only %d specified\n",
+            svc_ctx->spatial_layers, found);
+    res = VPX_CODEC_INVALID_PARAM;
+  }
+  free(input_string);
+  return res;
+}
+
+static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) {
+  svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n",
+          value);
+}
+
+static vpx_codec_err_t parse_scale_factors(SvcContext *svc_ctx,
+                                           const char *scale_factors) {
+  char *input_string;
+  char *token;
+  const char *delim = ",";
+  char *save_ptr;
+  int found = 0;
+  int i;
+  int64_t num, den;
+  int res = VPX_CODEC_OK;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+
+  if (scale_factors == NULL || strlen(scale_factors) == 0) {
+    input_string = strdup(DEFAULT_SCALE_FACTORS);
+  } else {
+    input_string = strdup(scale_factors);
+  }
+  token = strtok_r(input_string, delim, &save_ptr);
+  for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+    num = den = 0;
+    if (token != NULL) {
+      num = strtol(token, &token, 10);
+      if (num <= 0) {
+        log_invalid_scale_factor(svc_ctx, token);
+        res = VPX_CODEC_INVALID_PARAM;
+        break;
+      }
+      if (*token++ != '/') {
+        log_invalid_scale_factor(svc_ctx, token);
+        res = VPX_CODEC_INVALID_PARAM;
+        break;
+      }
+      den = strtol(token, &token, 10);
+      if (den <= 0) {
+        log_invalid_scale_factor(svc_ctx, token);
+        res = VPX_CODEC_INVALID_PARAM;
+        break;
+      }
+      token = strtok_r(NULL, delim, &save_ptr);
+      found = i + 1;
+    }
+    si->scaling_factor_num[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] =
+        (int)num;
+    si->scaling_factor_den[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] =
+        (int)den;
+  }
+  if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
+    svc_log(svc_ctx, SVC_LOG_ERROR,
+            "svc: scale-factors: %d values required, but only %d specified\n",
+            svc_ctx->spatial_layers, found);
+    res = VPX_CODEC_INVALID_PARAM;
+  }
+  free(input_string);
+  return res;
+}
+
+/**
+ * Parse SVC encoding options
+ * Format: encoding-mode=<svc_mode>,layers=<layer_count>
+ *         scale-factors=<n1>/<d1>,<n2>/<d2>,...
+ *         quantizers=<q1>,<q2>,...
+ * svc_mode = [i|ip|alt_ip|gf]
+ */
+static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
+  char *input_string;
+  char *option_name;
+  char *option_value;
+  char *input_ptr;
+  int res = VPX_CODEC_OK;
+
+  if (options == NULL) return VPX_CODEC_OK;
+  input_string = strdup(options);
+
+  // parse option name
+  option_name = strtok_r(input_string, "=", &input_ptr);
+  while (option_name != NULL) {
+    // parse option value
+    option_value = strtok_r(NULL, " ", &input_ptr);
+    if (option_value == NULL) {
+      svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n",
+              option_name);
+      res = VPX_CODEC_INVALID_PARAM;
+      break;
+    }
+    if (strcmp("encoding-mode", option_name) == 0) {
+      res = set_option_encoding_mode(svc_ctx, option_value);
+      if (res != VPX_CODEC_OK) break;
+    } else if (strcmp("layers", option_name) == 0) {
+      svc_ctx->spatial_layers = atoi(option_value);
+    } else if (strcmp("scale-factors", option_name) == 0) {
+      res = parse_scale_factors(svc_ctx, option_value);
+      if (res != VPX_CODEC_OK) break;
+    } else if (strcmp("quantizers", option_name) == 0) {
+      res = parse_quantizer_values(svc_ctx, option_value);
+      if (res != VPX_CODEC_OK) break;
+    } else {
+      svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
+      res = VPX_CODEC_INVALID_PARAM;
+      break;
+    }
+    option_name = strtok_r(NULL, "=", &input_ptr);
+  }
+  free(input_string);
+  return res;
+}
+
+vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) {
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || options == NULL || si == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  strncpy(si->options, options, sizeof(si->options));
+  si->options[sizeof(si->options) - 1] = '\0';
+  return VPX_CODEC_OK;
+}
+
+vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
+                                       const char *quantizers) {
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || quantizers == NULL || si == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  strncpy(si->quantizers, quantizers, sizeof(si->quantizers));
+  si->quantizers[sizeof(si->quantizers) - 1] = '\0';
+  return VPX_CODEC_OK;
+}
+
+vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx,
+                                          const char *scale_factors) {
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || scale_factors == NULL || si == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  strncpy(si->scale_factors, scale_factors, sizeof(si->scale_factors));
+  si->scale_factors[sizeof(si->scale_factors) - 1] = '\0';
+  return VPX_CODEC_OK;
+}
+
+vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+                             vpx_codec_iface_t *iface,
+                             vpx_codec_enc_cfg_t *enc_cfg) {
+  int max_intra_size_pct;
+  vpx_codec_err_t res;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
+      enc_cfg == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  if (si == NULL) return VPX_CODEC_MEM_ERROR;
+
+  si->codec_ctx = codec_ctx;
+
+  si->width = enc_cfg->g_w;
+  si->height = enc_cfg->g_h;
+
+  if (enc_cfg->kf_max_dist < 2) {
+    svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n",
+            enc_cfg->kf_max_dist);
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  si->kf_dist = enc_cfg->kf_max_dist;
+
+  if (svc_ctx->spatial_layers == 0)
+    svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS;
+  if (svc_ctx->spatial_layers < 1 ||
+      svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) {
+    svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n",
+            svc_ctx->spatial_layers);
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  // use SvcInternal value for number of layers to enable forcing single layer
+  // for first frame
+  si->layers = svc_ctx->spatial_layers;
+
+  res = parse_quantizer_values(svc_ctx, si->quantizers);
+  if (res != VPX_CODEC_OK) return res;
+
+  res = parse_scale_factors(svc_ctx, si->scale_factors);
+  if (res != VPX_CODEC_OK) return res;
+
+  // parse aggregate command line options
+  res = parse_options(svc_ctx, si->options);
+  if (res != VPX_CODEC_OK) return res;
+
+  // modify encoder configuration
+  enc_cfg->ss_number_layers = si->layers;
+  enc_cfg->kf_mode = VPX_KF_DISABLED;
+  enc_cfg->g_pass = VPX_RC_ONE_PASS;
+  // Lag in frames not currently supported
+  enc_cfg->g_lag_in_frames = 0;
+
+  // TODO(ivanmaltz): determine if these values need to be set explicitly for
+  // svc, or if the normal default/override mechanism can be used
+  enc_cfg->rc_dropframe_thresh = 0;
+  enc_cfg->rc_end_usage = VPX_CBR;
+  enc_cfg->rc_resize_allowed = 0;
+  enc_cfg->rc_min_quantizer = 33;
+  enc_cfg->rc_max_quantizer = 33;
+  enc_cfg->rc_undershoot_pct = 100;
+  enc_cfg->rc_overshoot_pct = 15;
+  enc_cfg->rc_buf_initial_sz = 500;
+  enc_cfg->rc_buf_optimal_sz = 600;
+  enc_cfg->rc_buf_sz = 1000;
+  enc_cfg->g_error_resilient = 1;
+
+  // Initialize codec
+  res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR);
+  if (res != VPX_CODEC_OK) {
+    svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n");
+    return res;
+  }
+
+  vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
+  vpx_codec_control(codec_ctx, VP8E_SET_CPUUSED, 1);
+  vpx_codec_control(codec_ctx, VP8E_SET_STATIC_THRESHOLD, 1);
+  vpx_codec_control(codec_ctx, VP8E_SET_NOISE_SENSITIVITY, 1);
+  vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1);
+
+  max_intra_size_pct =
+      (int)(((double)enc_cfg->rc_buf_optimal_sz * 0.5) *
+            ((double)enc_cfg->g_timebase.den / enc_cfg->g_timebase.num) / 10.0);
+  vpx_codec_control(codec_ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+                    max_intra_size_pct);
+  return VPX_CODEC_OK;
+}
+
+// SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h
+
+// encoder should reference the last frame
+#define USE_LAST (1 << 0)
+
+// encoder should reference the alt ref frame
+#define USE_ARF (1 << 1)
+
+// encoder should reference the golden frame
+#define USE_GF (1 << 2)
+
+// encoder should copy current frame to the last frame buffer
+#define UPDATE_LAST (1 << 3)
+
+// encoder should copy current frame to the alt ref frame buffer
+#define UPDATE_ARF (1 << 4)
+
+// encoder should copy current frame to the golden frame
+#define UPDATE_GF (1 << 5)
+
+static int map_vp8_flags(int svc_flags) {
+  int flags = 0;
+
+  if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST;
+  if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF;
+  if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF;
+
+  if (svc_flags & UPDATE_LAST) {
+    // last is updated automatically
+  } else {
+    flags |= VP8_EFLAG_NO_UPD_LAST;
+  }
+  if (svc_flags & UPDATE_ARF) {
+    flags |= VP8_EFLAG_FORCE_ARF;
+  } else {
+    flags |= VP8_EFLAG_NO_UPD_ARF;
+  }
+  if (svc_flags & UPDATE_GF) {
+    flags |= VP8_EFLAG_FORCE_GF;
+  } else {
+    flags |= VP8_EFLAG_NO_UPD_GF;
+  }
+  return flags;
+}
+
+/**
+ * Helper to check if the current frame is the first, full resolution dummy.
+ */
+static int vpx_svc_dummy_frame(SvcContext *svc_ctx) {
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  return svc_ctx->first_frame_full_size == 1 && si->encode_frame_count == 0;
+}
+
+static void calculate_enc_frame_flags(SvcContext *svc_ctx) {
+  vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  const int is_keyframe = (si->frame_within_gop == 0);
+
+  // keyframe layer zero is identical for all modes
+  if ((is_keyframe && si->layer == 0) || vpx_svc_dummy_frame(svc_ctx)) {
+    si->enc_frame_flags = VPX_EFLAG_FORCE_KF;
+    return;
+  }
+
+  switch (svc_ctx->encoding_mode) {
+    case ALT_INTER_LAYER_PREDICTION_IP:
+      if (si->layer == 0) {
+        flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+      } else if (is_keyframe) {
+        if (si->layer == si->layers - 1) {
+          flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
+        } else {
+          flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF);
+        }
+      } else {
+        flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
+      }
+      break;
+    case INTER_LAYER_PREDICTION_I:
+      if (si->layer == 0) {
+        flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+      } else if (is_keyframe) {
+        flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
+      } else {
+        flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+      }
+      break;
+    case INTER_LAYER_PREDICTION_IP:
+      if (si->layer == 0) {
+        flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+      } else if (is_keyframe) {
+        flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
+      } else {
+        flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
+      }
+      break;
+    case USE_GOLDEN_FRAME:
+      if (2 * si->layers - SVC_REFERENCE_FRAMES <= si->layer) {
+        if (si->layer == 0) {
+          flags = map_vp8_flags(USE_LAST | USE_GF | UPDATE_LAST);
+        } else if (is_keyframe) {
+          flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF);
+        } else {
+          flags = map_vp8_flags(USE_LAST | USE_ARF | USE_GF | UPDATE_LAST);
+        }
+      } else {
+        if (si->layer == 0) {
+          flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+        } else if (is_keyframe) {
+          flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
+        } else {
+          flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+        }
+      }
+      break;
+    default:
+      svc_log(svc_ctx, SVC_LOG_ERROR, "unexpected encoding mode: %d\n",
+              svc_ctx->encoding_mode);
+      break;
+  }
+  si->enc_frame_flags = flags;
+}
+
+vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx,
+                                             int layer,
+                                             unsigned int *width,
+                                             unsigned int *height) {
+  int w, h, index, num, den;
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+
+  if (svc_ctx == NULL || si == NULL || width == NULL || height == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  }
+  if (layer < 0 || layer >= si->layers) return VPX_CODEC_INVALID_PARAM;
+
+  index = layer + VPX_SS_MAX_LAYERS - si->layers;
+  num = si->scaling_factor_num[index];
+  den = si->scaling_factor_den[index];
+  if (num == 0 || den == 0) return VPX_CODEC_INVALID_PARAM;
+
+  w = si->width * num / den;
+  h = si->height * num / den;
+
+  // make height and width even to make chrome player happy
+  w += w % 2;
+  h += h % 2;
+
+  *width = w;
+  *height = h;
+
+  return VPX_CODEC_OK;
+}
+
+static void set_svc_parameters(SvcContext *svc_ctx,
+                               vpx_codec_ctx_t *codec_ctx) {
+  int layer, layer_index;
+  vpx_svc_parameters_t svc_params;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+
+  memset(&svc_params, 0, sizeof(svc_params));
+  svc_params.layer = si->layer;
+  svc_params.flags = si->enc_frame_flags;
+
+  layer = si->layer;
+  if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
+      si->frame_within_gop == 0) {
+    // layers 1 & 3 don't exist in this mode, use the higher one
+    if (layer == 0 || layer == 2) {
+      layer += 1;
+    }
+  }
+  if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer,
+                                                   &svc_params.width,
+                                                   &svc_params.height)) {
+    svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n");
+  }
+  layer_index = layer + VPX_SS_MAX_LAYERS - si->layers;
+  svc_params.min_quantizer = si->quantizer[layer_index];
+  svc_params.max_quantizer = si->quantizer[layer_index];
+  svc_params.distance_from_i_frame = si->frame_within_gop;
+
+  // Use buffer i for layer i LST
+  svc_params.lst_fb_idx = si->layer;
+
+  // Use buffer i-1 for layer i Alt (Inter-layer prediction)
+  if (si->layer != 0) {
+    const int use_higher_layer =
+        svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
+        si->frame_within_gop == 0;
+    svc_params.alt_fb_idx = use_higher_layer ? si->layer - 2 : si->layer - 1;
+  }
+
+  if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP) {
+    svc_params.gld_fb_idx = si->layer + 1;
+  } else {
+    if (si->layer < 2 * si->layers - SVC_REFERENCE_FRAMES)
+      svc_params.gld_fb_idx = svc_params.lst_fb_idx;
+    else
+      svc_params.gld_fb_idx = 2 * si->layers - 1 - si->layer;
+  }
+
+  svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n",
+          si->encode_frame_count, si->layer, svc_params.width,
+          svc_params.height, svc_params.min_quantizer);
+
+  if (svc_params.flags == VPX_EFLAG_FORCE_KF) {
+    svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n");
+  } else {
+    svc_log(
+        svc_ctx, SVC_LOG_DEBUG, "Using:    LST/GLD/ALT [%2d|%2d|%2d]\n",
+        svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx,
+        svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx,
+        svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx);
+    svc_log(
+        svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n",
+        svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx,
+        svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx,
+        svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx);
+  }
+
+  vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params);
+}
+
+/**
+ * Encode a frame into multiple layers
+ * Create a superframe containing the individual layers
+ */
+vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+                               struct vpx_image *rawimg, vpx_codec_pts_t pts,
+                               int64_t duration, int deadline) {
+  vpx_codec_err_t res;
+  vpx_codec_iter_t iter;
+  const vpx_codec_cx_pkt_t *cx_pkt;
+  struct LayerData *cx_layer_list = NULL;
+  struct LayerData *layer_data;
+  struct Superframe superframe;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) {
+    return VPX_CODEC_INVALID_PARAM;
+  }
+
+  memset(&superframe, 0, sizeof(superframe));
+  svc_log_reset(svc_ctx);
+
+  si->layers = vpx_svc_dummy_frame(svc_ctx) ? 1 : svc_ctx->spatial_layers;
+  if (si->frame_within_gop >= si->kf_dist ||
+      si->encode_frame_count == 0 ||
+      (si->encode_frame_count == 1 && svc_ctx->first_frame_full_size == 1)) {
+    si->frame_within_gop = 0;
+  }
+  si->is_keyframe = (si->frame_within_gop == 0);
+  si->frame_size = 0;
+
+  svc_log(svc_ctx, SVC_LOG_DEBUG,
+          "vpx_svc_encode  layers: %d, frame_count: %d, frame_within_gop: %d\n",
+          si->layers, si->encode_frame_count, si->frame_within_gop);
+
+  // encode each layer
+  for (si->layer = 0; si->layer < si->layers; ++si->layer) {
+    if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
+        si->is_keyframe && (si->layer == 1 || si->layer == 3)) {
+      svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer);
+      continue;
+    }
+    calculate_enc_frame_flags(svc_ctx);
+
+    if (vpx_svc_dummy_frame(svc_ctx)) {
+      // do not set svc parameters, use normal encode
+      svc_log(svc_ctx, SVC_LOG_DEBUG, "encoding full size first frame\n");
+    } else {
+      set_svc_parameters(svc_ctx, codec_ctx);
+    }
+    res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
+                           si->enc_frame_flags, deadline);
+    if (res != VPX_CODEC_OK) {
+      return res;
+    }
+    // save compressed data
+    iter = NULL;
+    while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) {
+      switch (cx_pkt->kind) {
+        case VPX_CODEC_CX_FRAME_PKT: {
+          const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
+          if (!vpx_svc_dummy_frame(svc_ctx)) {
+            si->bytes_in_layer[si->layer] += frame_pkt_size;
+            svc_log(svc_ctx, SVC_LOG_DEBUG,
+                    "SVC frame: %d, layer: %d, size: %u\n",
+                    si->encode_frame_count, si->layer, frame_pkt_size);
+          }
+          layer_data =
+              ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size);
+          if (layer_data == NULL) {
+            svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n");
+            return 0;
+          }
+          ld_list_add(&cx_layer_list, layer_data);
+
+          // save layer size in superframe index
+          superframe.sizes[superframe.count++] = frame_pkt_size;
+          superframe.magnitude |= frame_pkt_size;
+          break;
+        }
+        case VPX_CODEC_PSNR_PKT: {
+          if (!vpx_svc_dummy_frame(svc_ctx)) {
+            svc_log(svc_ctx, SVC_LOG_DEBUG,
+                    "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
+                    "%2.3f  %2.3f  %2.3f  %2.3f \n",
+                    si->encode_frame_count, si->layer,
+                    cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
+                    cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
+            si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
+          }
+          break;
+        }
+        default: {
+          break;
+        }
+      }
+    }
+  }
+  // add superframe index to layer data list
+  if (!vpx_svc_dummy_frame(svc_ctx)) {
+    sf_create_index(&superframe);
+    layer_data = ld_create(superframe.buffer, superframe.index_size);
+    ld_list_add(&cx_layer_list, layer_data);
+  }
+  // get accumulated size of layer data
+  si->frame_size = ld_list_get_buffer_size(cx_layer_list);
+  if (si->frame_size == 0) return VPX_CODEC_ERROR;
+
+  // all layers encoded, create single buffer with concatenated layers
+  if (si->frame_size > si->buffer_size) {
+    free(si->buffer);
+    si->buffer = malloc(si->frame_size);
+    if (si->buffer == NULL) {
+      ld_list_free(cx_layer_list);
+      return VPX_CODEC_MEM_ERROR;
+    }
+    si->buffer_size = si->frame_size;
+  }
+  // copy layer data into packet
+  ld_list_copy_to_buffer(cx_layer_list, si->buffer);
+
+  ld_list_free(cx_layer_list);
+
+  svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n",
+          si->encode_frame_count, si->is_keyframe, (int)si->frame_size,
+          (int)pts);
+  ++si->frame_within_gop;
+  ++si->encode_frame_count;
+
+  return VPX_CODEC_OK;
+}
+
+const char *vpx_svc_get_message(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return NULL;
+  return si->message_buffer;
+}
+
+void *vpx_svc_get_buffer(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return NULL;
+  return si->buffer;
+}
+
+size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return 0;
+  return si->frame_size;
+}
+
+int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return 0;
+  return si->encode_frame_count;
+}
+
+int vpx_svc_is_keyframe(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return 0;
+  return si->is_keyframe;
+}
+
+void vpx_svc_set_keyframe(SvcContext *svc_ctx) {
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return;
+  si->frame_within_gop = 0;
+}
+
+// dump accumulated statistics and reset accumulated values
+const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
+  int number_of_frames, number_of_keyframes, encode_frame_count;
+  int i;
+  uint32_t bytes_total = 0;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return NULL;
+
+  svc_log_reset(svc_ctx);
+
+  encode_frame_count = si->encode_frame_count;
+  if (svc_ctx->first_frame_full_size) encode_frame_count--;
+  if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx);
+
+  svc_log(svc_ctx, SVC_LOG_INFO, "\n");
+  number_of_keyframes = encode_frame_count / si->kf_dist + 1;
+  for (i = 0; i < si->layers; ++i) {
+    number_of_frames = encode_frame_count;
+
+    if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
+        (i == 1 || i == 3)) {
+      number_of_frames -= number_of_keyframes;
+    }
+    svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d PSNR=[%2.3f], Bytes=[%u]\n", i,
+            (double)si->psnr_in_layer[i] / number_of_frames,
+            si->bytes_in_layer[i]);
+    bytes_total += si->bytes_in_layer[i];
+    si->psnr_in_layer[i] = 0;
+    si->bytes_in_layer[i] = 0;
+  }
+
+  // only display statistics once
+  si->encode_frame_count = 0;
+
+  svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total);
+  return vpx_svc_get_message(svc_ctx);
+}
+
+void vpx_svc_release(SvcContext *svc_ctx) {
+  SvcInternal *si;
+  if (svc_ctx == NULL) return;
+  // do not use get_svc_internal as it will unnecessarily allocate an
+  // SvcInternal if it was not already allocated
+  si = (SvcInternal *)svc_ctx->internal;
+  if (si != NULL) {
+    free(si->buffer);
+    free(si);
+    svc_ctx->internal = NULL;
+  }
+}
diff --git a/vpx/svc_context.h b/vpx/svc_context.h
new file mode 100644
index 000000000..f4933f8d6
--- /dev/null
+++ b/vpx/svc_context.h
@@ -0,0 +1,142 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/**
+ * SvcContext - input parameters and state to encode a multi-layered
+ * spatial SVC frame
+ */
+
+#ifndef VPX_SVC_CONTEXT_H_
+#define VPX_SVC_CONTEXT_H_
+
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum SVC_ENCODING_MODE {
+  INTER_LAYER_PREDICTION_I,
+  ALT_INTER_LAYER_PREDICTION_IP,
+  INTER_LAYER_PREDICTION_IP,
+  USE_GOLDEN_FRAME
+} SVC_ENCODING_MODE;
+
+typedef enum SVC_LOG_LEVEL {
+  SVC_LOG_ERROR,
+  SVC_LOG_INFO,
+  SVC_LOG_DEBUG
+} SVC_LOG_LEVEL;
+
+typedef struct {
+  // public interface to svc_command options
+  int spatial_layers;               // number of layers
+  int first_frame_full_size;        // set to one to force first frame full size
+  SVC_ENCODING_MODE encoding_mode;  // svc encoding strategy
+  SVC_LOG_LEVEL log_level;  // amount of information to display
+  int log_print;  // when set, printf log messages instead of returning the
+                  // message with svc_get_message
+
+  // private storage for vpx_svc_encode
+  void *internal;
+} SvcContext;
+
+/**
+ * Set SVC options
+ * options are supplied as a single string separated by spaces
+ * Format: encoding-mode=<i|ip|alt-ip|gf>
+ *         layers=<layer_count>
+ *         scaling-factors=<n1>/<d1>,<n2>/<d2>,...
+ *         quantizers=<q1>,<q2>,...
+ */
+vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options);
+
+/**
+ * Set SVC quantizer values
+ * values comma separated, ordered from lowest resolution to highest
+ * e.g., "60,53,39,33,27"
+ */
+vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
+                                       const char *quantizer_values);
+
+/**
+ * Set SVC scale factors
+ * values comma separated, ordered from lowest resolution to highest
+ * e.g.,  "4/16,5/16,7/16,11/16,16/16"
+ */
+vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx,
+                                          const char *scale_factors);
+
+/**
+ * initialize SVC encoding
+ */
+vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+                             vpx_codec_iface_t *iface,
+                             vpx_codec_enc_cfg_t *cfg);
+/**
+ * encode a frame of video with multiple layers
+ */
+vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+                               struct vpx_image *rawimg, vpx_codec_pts_t pts,
+                               int64_t duration, int deadline);
+
+/**
+ * finished with svc encoding, release allocated resources
+ */
+void vpx_svc_release(SvcContext *svc_ctx);
+
+/**
+ * dump accumulated statistics and reset accumulated values
+ */
+const char *vpx_svc_dump_statistics(SvcContext *svc_ctx);
+
+/**
+ *  get status message from previous encode
+ */
+const char *vpx_svc_get_message(const SvcContext *svc_ctx);
+
+/**
+ * return size of encoded data to be returned by vpx_svc_get_buffer
+ */
+size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx);
+
+/**
+ * return buffer with encoded data
+ */
+void *vpx_svc_get_buffer(const SvcContext *svc_ctx);
+
+/**
+ * return spatial resolution of the specified layer
+ */
+vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx,
+                                             int layer,
+                                             unsigned int *width,
+                                             unsigned int *height);
+/**
+ * return number of frames that have been encoded
+ */
+int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx);
+
+/**
+ * return 1 if last encoded frame was a keyframe
+ */
+int vpx_svc_is_keyframe(const SvcContext *svc_ctx);
+
+/**
+ * force the next frame to be a keyframe
+ */
+void vpx_svc_set_keyframe(SvcContext *svc_ctx);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  /* VPX_SVC_CONTEXT_H_ */
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 9f68c38d2..433cc0d8a 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -194,13 +194,8 @@ enum vp8e_enc_control_id {
   VP9E_SET_TILE_ROWS,
   VP9E_SET_FRAME_PARALLEL_DECODING,
 
-  VP9E_SET_WIDTH              = 99,
-  VP9E_SET_HEIGHT,
-  VP9E_SET_LAYER,
   VP9E_SET_SVC,
-
-  VP9E_SET_MAX_Q,
-  VP9E_SET_MIN_Q
+  VP9E_SET_SVC_PARAMETERS
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -283,6 +278,23 @@ typedef enum {
   VP8_TUNE_SSIM
 } vp8e_tuning;
 
+/*!\brief  vp9 svc parameters
+ *
+ * This defines parameters for svc encoding.
+ *
+ */
+typedef struct vpx_svc_parameters {
+  unsigned int width;         /**< width of current spatial layer */
+  unsigned int height;        /**< height of current spatial layer */
+  int layer;                  /**< current layer number - 0 = base */
+  int flags;                  /**< encode frame flags */
+  int max_quantizer;          /**< max quantizer for current layer */
+  int min_quantizer;          /**< min quantizer for current layer */
+  int distance_from_i_frame;  /**< frame number within current gop */
+  int lst_fb_idx;             /**< last frame frame buffer index */
+  int gld_fb_idx;             /**< golden frame frame buffer index */
+  int alt_fb_idx;             /**< alt reference frame frame buffer index */
+} vpx_svc_parameters_t;
 
 /*!\brief VP8 encoder control function parameter type
  *
@@ -303,11 +315,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP,            vpx_roi_map_t *)
 VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP,          vpx_active_map_t *)
 VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE,          vpx_scaling_mode_t *)
 
-VPX_CTRL_USE_TYPE(VP9E_SET_LAYER,              int *)
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC,                int)
-
-VPX_CTRL_USE_TYPE(VP9E_SET_WIDTH,              unsigned int *)
-VPX_CTRL_USE_TYPE(VP9E_SET_HEIGHT,             unsigned int *)
+VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS,     vpx_svc_parameters_t *)
 
 VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED,            int)
 VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF,   unsigned int)
@@ -334,8 +343,6 @@ VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int)
 
 VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int)
 
-VPX_CTRL_USE_TYPE(VP9E_SET_MAX_Q,      unsigned int)
-VPX_CTRL_USE_TYPE(VP9E_SET_MIN_Q,      unsigned int)
 /*! @} - end defgroup vp8_encoder */
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 3d5510f66..549c24908 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -15,6 +15,8 @@ API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
 API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
 API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
 API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
+API_SRCS-$(CONFIG_VP9_ENCODER) += src/svc_encodeframe.c
+API_SRCS-$(CONFIG_VP9_ENCODER) += svc_context.h
 
 API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h
 API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
diff --git a/vpxdec.c b/vpxdec.c
index 1860474cb..8e575e16f 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -50,8 +50,6 @@
 
 static const char *exec_name;
 
-#define VP8_FOURCC (0x00385056)
-#define VP9_FOURCC (0x00395056)
 static const struct {
   char const *name;
   const vpx_codec_iface_t *(*iface)(void);
@@ -59,10 +57,10 @@ static const struct {
   unsigned int             fourcc_mask;
 } ifaces[] = {
 #if CONFIG_VP8_DECODER
-  {"vp8",  vpx_codec_vp8_dx,   VP8_FOURCC, 0x00FFFFFF},
+  {"vp8",  vpx_codec_vp8_dx,   VP8_FOURCC_MASK, 0x00FFFFFF},
 #endif
 #if CONFIG_VP9_DECODER
-  {"vp9",  vpx_codec_vp9_dx,   VP9_FOURCC, 0x00FFFFFF},
+  {"vp9",  vpx_codec_vp9_dx,   VP9_FOURCC_MASK, 0x00FFFFFF},
 #endif
 };
 
@@ -143,7 +141,7 @@ static const arg_def_t *vp8_pp_args[] = {
 };
 #endif
 
-static void usage_exit() {
+void usage_exit() {
   int i;
 
   fprintf(stderr, "Usage: %s <options> filename\n\n"
@@ -178,14 +176,6 @@ static void usage_exit() {
   exit(EXIT_FAILURE);
 }
 
-void die(const char *fmt, ...) {
-  va_list ap;
-  va_start(ap, fmt);
-  vfprintf(stderr, fmt, ap);
-  fprintf(stderr, "\n");
-  usage_exit();
-}
-
 static unsigned int mem_get_le16(const void *vmem) {
   unsigned int  val;
   const unsigned char *mem = (const unsigned char *)vmem;
@@ -575,9 +565,9 @@ file_is_webm(struct input_ctx *input,
 
   codec_id = nestegg_track_codec_id(input->nestegg_ctx, i);
   if (codec_id == NESTEGG_CODEC_VP8) {
-    *fourcc = VP8_FOURCC;
+    *fourcc = VP8_FOURCC_MASK;
   } else if (codec_id == NESTEGG_CODEC_VP9) {
-    *fourcc = VP9_FOURCC;
+    *fourcc = VP9_FOURCC_MASK;
   } else {
     fprintf(stderr, "Not VPx video, quitting.\n");
     exit(1);
diff --git a/vpxenc.c b/vpxenc.c
index aa99c6b7c..877a0090b 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -34,6 +34,8 @@
 #include <unistd.h>
 #endif
 
+#include "third_party/libyuv/include/libyuv/scale.h"
+
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
 #include "vpx/vp8cx.h"
 #endif
@@ -44,10 +46,9 @@
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/vpx_timer.h"
 #include "tools_common.h"
+#include "webmenc.h"
 #include "y4minput.h"
-#include "third_party/libmkv/EbmlWriter.h"
-#include "third_party/libmkv/EbmlIDs.h"
-#include "third_party/libyuv/include/libyuv/scale.h"
+
 
 /* Need special handling of these functions on Windows */
 #if defined(_MSC_VER)
@@ -89,8 +90,6 @@ static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
 
 static const char *exec_name;
 
-#define VP8_FOURCC (0x30385056)
-#define VP9_FOURCC (0x30395056)
 static const struct codec_item {
   char const              *name;
   const vpx_codec_iface_t *(*iface)(void);
@@ -109,37 +108,6 @@ static const struct codec_item {
 #endif
 };
 
-static void usage_exit();
-
-#define LOG_ERROR(label) do \
-  {\
-    const char *l=label;\
-    va_list ap;\
-    va_start(ap, fmt);\
-    if(l)\
-      fprintf(stderr, "%s: ", l);\
-    vfprintf(stderr, fmt, ap);\
-    fprintf(stderr, "\n");\
-    va_end(ap);\
-  } while(0)
-
-void die(const char *fmt, ...) {
-  LOG_ERROR(NULL);
-  usage_exit();
-}
-
-
-void fatal(const char *fmt, ...) {
-  LOG_ERROR("Fatal");
-  exit(EXIT_FAILURE);
-}
-
-
-void warn(const char *fmt, ...) {
-  LOG_ERROR("Warning");
-}
-
-
 static void warn_or_exit_on_errorv(vpx_codec_ctx_t *ctx, int fatal,
                                    const char *s, va_list ap) {
   if (ctx->err) {
@@ -293,15 +261,6 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats) {
   return stats->buf;
 }
 
-/* Stereo 3D packed frame format */
-typedef enum stereo_format {
-  STEREO_FORMAT_MONO       = 0,
-  STEREO_FORMAT_LEFT_RIGHT = 1,
-  STEREO_FORMAT_BOTTOM_TOP = 2,
-  STEREO_FORMAT_TOP_BOTTOM = 3,
-  STEREO_FORMAT_RIGHT_LEFT = 11
-} stereo_format_t;
-
 enum video_file_type {
   FILE_TYPE_RAW,
   FILE_TYPE_IVF,
@@ -496,376 +455,6 @@ static void write_ivf_frame_size(FILE *outfile, size_t size) {
 }
 
 
-typedef off_t EbmlLoc;
-
-
-struct cue_entry {
-  unsigned int time;
-  uint64_t     loc;
-};
-
-
-struct EbmlGlobal {
-  int debug;
-
-  FILE    *stream;
-  int64_t last_pts_ms;
-  vpx_rational_t  framerate;
-
-  /* These pointers are to the start of an element */
-  off_t    position_reference;
-  off_t    seek_info_pos;
-  off_t    segment_info_pos;
-  off_t    track_pos;
-  off_t    cue_pos;
-  off_t    cluster_pos;
-
-  /* This pointer is to a specific element to be serialized */
-  off_t    track_id_pos;
-
-  /* These pointers are to the size field of the element */
-  EbmlLoc  startSegment;
-  EbmlLoc  startCluster;
-
-  uint32_t cluster_timecode;
-  int      cluster_open;
-
-  struct cue_entry *cue_list;
-  unsigned int      cues;
-
-};
-
-
-void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) {
-  (void) fwrite(buffer_in, 1, len, glob->stream);
-}
-
-#define WRITE_BUFFER(s) \
-  for(i = len-1; i>=0; i--)\
-  { \
-    x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \
-    Ebml_Write(glob, &x, 1); \
-  }
-void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) {
-  char x;
-  int i;
-
-  /* buffer_size:
-   * 1 - int8_t;
-   * 2 - int16_t;
-   * 3 - int32_t;
-   * 4 - int64_t;
-   */
-  switch (buffer_size) {
-    case 1:
-      WRITE_BUFFER(int8_t)
-      break;
-    case 2:
-      WRITE_BUFFER(int16_t)
-      break;
-    case 4:
-      WRITE_BUFFER(int32_t)
-      break;
-    case 8:
-      WRITE_BUFFER(int64_t)
-      break;
-    default:
-      break;
-  }
-}
-#undef WRITE_BUFFER
-
-/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit
- * one, but not a 32 bit one.
- */
-static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
-  unsigned char sizeSerialized = 4 | 0x80;
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), 4);
-}
-
-
-static void
-Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
-                     unsigned long class_id) {
-  /* todo this is always taking 8 bytes, this may need later optimization */
-  /* this is a key that says length unknown */
-  uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
-
-  Ebml_WriteID(glob, class_id);
-  *ebmlLoc = ftello(glob->stream);
-  Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8);
-}
-
-static void
-Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) {
-  off_t pos;
-  uint64_t size;
-
-  /* Save the current stream pointer */
-  pos = ftello(glob->stream);
-
-  /* Calculate the size of this element */
-  size = pos - *ebmlLoc - 8;
-  size |= LITERALU64(0x01000000, 0x00000000);
-
-  /* Seek back to the beginning of the element and write the new size */
-  fseeko(glob->stream, *ebmlLoc, SEEK_SET);
-  Ebml_Serialize(glob, &size, sizeof(size), 8);
-
-  /* Reset the stream pointer */
-  fseeko(glob->stream, pos, SEEK_SET);
-}
-
-
-static void
-write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos) {
-  uint64_t offset = pos - ebml->position_reference;
-  EbmlLoc start;
-  Ebml_StartSubElement(ebml, &start, Seek);
-  Ebml_SerializeBinary(ebml, SeekID, id);
-  Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
-  Ebml_EndSubElement(ebml, &start);
-}
-
-
-static void
-write_webm_seek_info(EbmlGlobal *ebml) {
-
-  off_t pos;
-
-  /* Save the current stream pointer */
-  pos = ftello(ebml->stream);
-
-  if (ebml->seek_info_pos)
-    fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
-  else
-    ebml->seek_info_pos = pos;
-
-  {
-    EbmlLoc start;
-
-    Ebml_StartSubElement(ebml, &start, SeekHead);
-    write_webm_seek_element(ebml, Tracks, ebml->track_pos);
-    write_webm_seek_element(ebml, Cues,   ebml->cue_pos);
-    write_webm_seek_element(ebml, Info,   ebml->segment_info_pos);
-    Ebml_EndSubElement(ebml, &start);
-  }
-  {
-    /* segment info */
-    EbmlLoc startInfo;
-    uint64_t frame_time;
-    char version_string[64];
-
-    /* Assemble version string */
-    if (ebml->debug)
-      strcpy(version_string, "vpxenc");
-    else {
-      strcpy(version_string, "vpxenc ");
-      strncat(version_string,
-              vpx_codec_version_str(),
-              sizeof(version_string) - 1 - strlen(version_string));
-    }
-
-    frame_time = (uint64_t)1000 * ebml->framerate.den
-                 / ebml->framerate.num;
-    ebml->segment_info_pos = ftello(ebml->stream);
-    Ebml_StartSubElement(ebml, &startInfo, Info);
-    Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
-    Ebml_SerializeFloat(ebml, Segment_Duration,
-                        (double)(ebml->last_pts_ms + frame_time));
-    Ebml_SerializeString(ebml, 0x4D80, version_string);
-    Ebml_SerializeString(ebml, 0x5741, version_string);
-    Ebml_EndSubElement(ebml, &startInfo);
-  }
-}
-
-
-static void
-write_webm_file_header(EbmlGlobal                *glob,
-                       const vpx_codec_enc_cfg_t *cfg,
-                       const struct vpx_rational *fps,
-                       stereo_format_t            stereo_fmt,
-                       unsigned int               fourcc) {
-  {
-    EbmlLoc start;
-    Ebml_StartSubElement(glob, &start, EBML);
-    Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
-    Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
-    Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
-    Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
-    Ebml_SerializeString(glob, DocType, "webm");
-    Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
-    Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
-    Ebml_EndSubElement(glob, &start);
-  }
-  {
-    Ebml_StartSubElement(glob, &glob->startSegment, Segment);
-    glob->position_reference = ftello(glob->stream);
-    glob->framerate = *fps;
-    write_webm_seek_info(glob);
-
-    {
-      EbmlLoc trackStart;
-      glob->track_pos = ftello(glob->stream);
-      Ebml_StartSubElement(glob, &trackStart, Tracks);
-      {
-        unsigned int trackNumber = 1;
-        uint64_t     trackID = 0;
-
-        EbmlLoc start;
-        Ebml_StartSubElement(glob, &start, TrackEntry);
-        Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
-        glob->track_id_pos = ftello(glob->stream);
-        Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
-        Ebml_SerializeUnsigned(glob, TrackType, 1);
-        Ebml_SerializeString(glob, CodecID,
-                             fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
-        {
-          unsigned int pixelWidth = cfg->g_w;
-          unsigned int pixelHeight = cfg->g_h;
-
-          EbmlLoc videoStart;
-          Ebml_StartSubElement(glob, &videoStart, Video);
-          Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
-          Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
-          Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
-          Ebml_EndSubElement(glob, &videoStart);
-        }
-        Ebml_EndSubElement(glob, &start); /* Track Entry */
-      }
-      Ebml_EndSubElement(glob, &trackStart);
-    }
-    /* segment element is open */
-  }
-}
-
-
-static void
-write_webm_block(EbmlGlobal                *glob,
-                 const vpx_codec_enc_cfg_t *cfg,
-                 const vpx_codec_cx_pkt_t  *pkt) {
-  unsigned long  block_length;
-  unsigned char  track_number;
-  unsigned short block_timecode = 0;
-  unsigned char  flags;
-  int64_t        pts_ms;
-  int            start_cluster = 0, is_keyframe;
-
-  /* Calculate the PTS of this frame in milliseconds */
-  pts_ms = pkt->data.frame.pts * 1000
-           * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
-  if (pts_ms <= glob->last_pts_ms)
-    pts_ms = glob->last_pts_ms + 1;
-  glob->last_pts_ms = pts_ms;
-
-  /* Calculate the relative time of this block */
-  if (pts_ms - glob->cluster_timecode > SHRT_MAX)
-    start_cluster = 1;
-  else
-    block_timecode = (unsigned short)pts_ms - glob->cluster_timecode;
-
-  is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
-  if (start_cluster || is_keyframe) {
-    if (glob->cluster_open)
-      Ebml_EndSubElement(glob, &glob->startCluster);
-
-    /* Open the new cluster */
-    block_timecode = 0;
-    glob->cluster_open = 1;
-    glob->cluster_timecode = (uint32_t)pts_ms;
-    glob->cluster_pos = ftello(glob->stream);
-    Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */
-    Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
-
-    /* Save a cue point if this is a keyframe. */
-    if (is_keyframe) {
-      struct cue_entry *cue, *new_cue_list;
-
-      new_cue_list = realloc(glob->cue_list,
-                             (glob->cues + 1) * sizeof(struct cue_entry));
-      if (new_cue_list)
-        glob->cue_list = new_cue_list;
-      else
-        fatal("Failed to realloc cue list.");
-
-      cue = &glob->cue_list[glob->cues];
-      cue->time = glob->cluster_timecode;
-      cue->loc = glob->cluster_pos;
-      glob->cues++;
-    }
-  }
-
-  /* Write the Simple Block */
-  Ebml_WriteID(glob, SimpleBlock);
-
-  block_length = (unsigned long)pkt->data.frame.sz + 4;
-  block_length |= 0x10000000;
-  Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
-
-  track_number = 1;
-  track_number |= 0x80;
-  Ebml_Write(glob, &track_number, 1);
-
-  Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2);
-
-  flags = 0;
-  if (is_keyframe)
-    flags |= 0x80;
-  if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
-    flags |= 0x08;
-  Ebml_Write(glob, &flags, 1);
-
-  Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz);
-}
-
-
-static void
-write_webm_file_footer(EbmlGlobal *glob, long hash) {
-
-  if (glob->cluster_open)
-    Ebml_EndSubElement(glob, &glob->startCluster);
-
-  {
-    EbmlLoc start;
-    unsigned int i;
-
-    glob->cue_pos = ftello(glob->stream);
-    Ebml_StartSubElement(glob, &start, Cues);
-    for (i = 0; i < glob->cues; i++) {
-      struct cue_entry *cue = &glob->cue_list[i];
-      EbmlLoc start;
-
-      Ebml_StartSubElement(glob, &start, CuePoint);
-      {
-        EbmlLoc start;
-
-        Ebml_SerializeUnsigned(glob, CueTime, cue->time);
-
-        Ebml_StartSubElement(glob, &start, CueTrackPositions);
-        Ebml_SerializeUnsigned(glob, CueTrack, 1);
-        Ebml_SerializeUnsigned64(glob, CueClusterPosition,
-                                 cue->loc - glob->position_reference);
-        Ebml_EndSubElement(glob, &start);
-      }
-      Ebml_EndSubElement(glob, &start);
-    }
-    Ebml_EndSubElement(glob, &start);
-  }
-
-  Ebml_EndSubElement(glob, &glob->startSegment);
-
-  /* Patch up the seek info block */
-  write_webm_seek_info(glob);
-
-  /* Patch up the track id */
-  fseeko(glob->stream, glob->track_id_pos, SEEK_SET);
-  Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash);
-
-  fseeko(glob->stream, 0, SEEK_END);
-}
-
 
 /* Murmur hash derived from public domain reference implementation at
  *   http:// sites.google.com/site/murmurhash/
@@ -1172,7 +761,7 @@ static const int vp9_arg_ctrl_map[] = {
 
 static const arg_def_t *no_args[] = { NULL };
 
-static void usage_exit() {
+void usage_exit() {
   int i;
 
   fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
@@ -1647,7 +1236,7 @@ struct stream_state {
   struct stream_config      config;
   FILE                     *file;
   struct rate_hist          rate_hist;
-  EbmlGlobal                ebml;
+  struct EbmlGlobal         ebml;
   uint32_t                  hash;
   uint64_t                  psnr_sse_total;
   uint64_t                  psnr_samples_total;
@@ -1820,17 +1409,7 @@ void open_input_file(struct input_state *input) {
     } else
       fatal("Unsupported Y4M stream.");
   } else if (input->detect.buf_read == 4 && file_is_ivf(input, &fourcc)) {
-    input->file_type = FILE_TYPE_IVF;
-    switch (fourcc) {
-      case 0x32315659:
-        input->use_i420 = 0;
-        break;
-      case 0x30323449:
-        input->use_i420 = 1;
-        break;
-      default:
-        fatal("Unsupported fourcc (%08x) in IVF", fourcc);
-    }
+    fatal("IVF is not supported as input.");
   } else {
     input->file_type = FILE_TYPE_RAW;
   }
@@ -1844,7 +1423,7 @@ static void close_input_file(struct input_state *input) {
 }
 
 static struct stream_state *new_stream(struct global_config *global,
-                                       struct stream_state  *prev) {
+                                       struct stream_state *prev) {
   struct stream_state *stream;
 
   stream = calloc(1, sizeof(*stream));
diff --git a/webmenc.c b/webmenc.c
new file mode 100644
index 000000000..a584e9db9
--- /dev/null
+++ b/webmenc.c
@@ -0,0 +1,345 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "webmenc.h"
+
+#include <limits.h>
+#include <string.h>
+
+#include "third_party/libmkv/EbmlWriter.h"
+#include "third_party/libmkv/EbmlIDs.h"
+
+#if defined(_MSC_VER)
+/* MSVS uses _f{seek,tell}i64 */
+#define fseeko _fseeki64
+#define ftello _ftelli64
+#elif defined(_WIN32)
+/* MinGW defines off_t as long, and uses f{seek,tell}o64/off64_t for large
+ * files */
+#define fseeko fseeko64
+#define ftello ftello64
+#define off_t off64_t
+#endif
+
+#define LITERALU64(hi, lo) ((((uint64_t)hi) << 32) | lo)
+
+void Ebml_Write(struct EbmlGlobal *glob,
+                const void *buffer_in,
+                unsigned long len) {
+  (void) fwrite(buffer_in, 1, len, glob->stream);
+}
+
+#define WRITE_BUFFER(s) \
+for (i = len - 1; i >= 0; i--) { \
+  x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \
+  Ebml_Write(glob, &x, 1); \
+}
+
+void Ebml_Serialize(struct EbmlGlobal *glob,
+                    const void *buffer_in,
+                    int buffer_size,
+                    unsigned long len) {
+  char x;
+  int i;
+
+  /* buffer_size:
+   * 1 - int8_t;
+   * 2 - int16_t;
+   * 3 - int32_t;
+   * 4 - int64_t;
+   */
+  switch (buffer_size) {
+    case 1:
+      WRITE_BUFFER(int8_t)
+      break;
+    case 2:
+      WRITE_BUFFER(int16_t)
+      break;
+    case 4:
+      WRITE_BUFFER(int32_t)
+      break;
+    case 8:
+      WRITE_BUFFER(int64_t)
+      break;
+    default:
+      break;
+  }
+}
+#undef WRITE_BUFFER
+
+/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit
+ * one, but not a 32 bit one.
+ */
+static void Ebml_SerializeUnsigned32(struct EbmlGlobal *glob,
+                                     unsigned int class_id,
+                                     uint64_t ui) {
+  const unsigned char sizeSerialized = 4 | 0x80;
+  Ebml_WriteID(glob, class_id);
+  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+  Ebml_Serialize(glob, &ui, sizeof(ui), 4);
+}
+
+static void Ebml_StartSubElement(struct EbmlGlobal *glob,
+                                 EbmlLoc *ebmlLoc,
+                                 unsigned int class_id) {
+  const uint64_t kEbmlUnknownLength = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
+  Ebml_WriteID(glob, class_id);
+  *ebmlLoc = ftello(glob->stream);
+  Ebml_Serialize(glob, &kEbmlUnknownLength, sizeof(kEbmlUnknownLength), 8);
+}
+
+static void Ebml_EndSubElement(struct EbmlGlobal *glob, EbmlLoc *ebmlLoc) {
+  off_t pos;
+  uint64_t size;
+
+  /* Save the current stream pointer. */
+  pos = ftello(glob->stream);
+
+  /* Calculate the size of this element. */
+  size = pos - *ebmlLoc - 8;
+  size |= LITERALU64(0x01000000, 0x00000000);
+
+  /* Seek back to the beginning of the element and write the new size. */
+  fseeko(glob->stream, *ebmlLoc, SEEK_SET);
+  Ebml_Serialize(glob, &size, sizeof(size), 8);
+
+  /* Reset the stream pointer. */
+  fseeko(glob->stream, pos, SEEK_SET);
+}
+
+void write_webm_seek_element(struct EbmlGlobal *ebml,
+                             unsigned int id,
+                             off_t pos) {
+  uint64_t offset = pos - ebml->position_reference;
+  EbmlLoc start;
+  Ebml_StartSubElement(ebml, &start, Seek);
+  Ebml_SerializeBinary(ebml, SeekID, id);
+  Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
+  Ebml_EndSubElement(ebml, &start);
+}
+
+void write_webm_seek_info(struct EbmlGlobal *ebml) {
+  off_t pos;
+  EbmlLoc start;
+  EbmlLoc startInfo;
+  uint64_t frame_time;
+  char version_string[64];
+
+  /* Save the current stream pointer. */
+  pos = ftello(ebml->stream);
+
+  if (ebml->seek_info_pos)
+    fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
+  else
+    ebml->seek_info_pos = pos;
+
+  Ebml_StartSubElement(ebml, &start, SeekHead);
+  write_webm_seek_element(ebml, Tracks, ebml->track_pos);
+  write_webm_seek_element(ebml, Cues, ebml->cue_pos);
+  write_webm_seek_element(ebml, Info, ebml->segment_info_pos);
+  Ebml_EndSubElement(ebml, &start);
+
+  /* Create and write the Segment Info. */
+  if (ebml->debug) {
+    strcpy(version_string, "vpxenc");
+  } else {
+    strcpy(version_string, "vpxenc ");
+    strncat(version_string,
+            vpx_codec_version_str(),
+            sizeof(version_string) - 1 - strlen(version_string));
+  }
+
+  frame_time = (uint64_t)1000 * ebml->framerate.den
+               / ebml->framerate.num;
+  ebml->segment_info_pos = ftello(ebml->stream);
+  Ebml_StartSubElement(ebml, &startInfo, Info);
+  Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
+  Ebml_SerializeFloat(ebml, Segment_Duration,
+                      (double)(ebml->last_pts_ms + frame_time));
+  Ebml_SerializeString(ebml, 0x4D80, version_string);
+  Ebml_SerializeString(ebml, 0x5741, version_string);
+  Ebml_EndSubElement(ebml, &startInfo);
+}
+
+void write_webm_file_header(struct EbmlGlobal *glob,
+                            const vpx_codec_enc_cfg_t *cfg,
+                            const struct vpx_rational *fps,
+                            stereo_format_t stereo_fmt,
+                            unsigned int fourcc) {
+  EbmlLoc start;
+  EbmlLoc trackStart;
+  EbmlLoc videoStart;
+  unsigned int trackNumber = 1;
+  uint64_t trackID = 0;
+  unsigned int pixelWidth = cfg->g_w;
+  unsigned int pixelHeight = cfg->g_h;
+
+  /* Write the EBML header. */
+  Ebml_StartSubElement(glob, &start, EBML);
+  Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+  Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
+  Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
+  Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
+  Ebml_SerializeString(glob, DocType, "webm");
+  Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
+  Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
+  Ebml_EndSubElement(glob, &start);
+
+  /* Open and begin writing the segment element. */
+  Ebml_StartSubElement(glob, &glob->startSegment, Segment);
+  glob->position_reference = ftello(glob->stream);
+  glob->framerate = *fps;
+  write_webm_seek_info(glob);
+
+  /* Open and write the Tracks element. */
+  glob->track_pos = ftello(glob->stream);
+  Ebml_StartSubElement(glob, &trackStart, Tracks);
+
+  /* Open and write the Track entry. */
+  Ebml_StartSubElement(glob, &start, TrackEntry);
+  Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+  glob->track_id_pos = ftello(glob->stream);
+  Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
+  Ebml_SerializeUnsigned(glob, TrackType, 1);
+  Ebml_SerializeString(glob, CodecID,
+                       fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
+  Ebml_StartSubElement(glob, &videoStart, Video);
+  Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+  Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+  Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
+  Ebml_EndSubElement(glob, &videoStart);
+
+  /* Close Track entry. */
+  Ebml_EndSubElement(glob, &start);
+
+  /* Close Tracks element. */
+  Ebml_EndSubElement(glob, &trackStart);
+
+  /* Segment element remains open. */
+}
+
+void write_webm_block(struct EbmlGlobal *glob,
+                      const vpx_codec_enc_cfg_t *cfg,
+                      const vpx_codec_cx_pkt_t *pkt) {
+  unsigned int block_length;
+  unsigned char track_number;
+  uint16_t block_timecode = 0;
+  unsigned char flags;
+  int64_t pts_ms;
+  int start_cluster = 0, is_keyframe;
+
+  /* Calculate the PTS of this frame in milliseconds. */
+  pts_ms = pkt->data.frame.pts * 1000
+           * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
+
+  if (pts_ms <= glob->last_pts_ms)
+    pts_ms = glob->last_pts_ms + 1;
+
+  glob->last_pts_ms = pts_ms;
+
+  /* Calculate the relative time of this block. */
+  if (pts_ms - glob->cluster_timecode > SHRT_MAX)
+    start_cluster = 1;
+  else
+    block_timecode = (uint16_t)pts_ms - glob->cluster_timecode;
+
+  is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
+  if (start_cluster || is_keyframe) {
+    if (glob->cluster_open)
+      Ebml_EndSubElement(glob, &glob->startCluster);
+
+    /* Open the new cluster. */
+    block_timecode = 0;
+    glob->cluster_open = 1;
+    glob->cluster_timecode = (uint32_t)pts_ms;
+    glob->cluster_pos = ftello(glob->stream);
+    Ebml_StartSubElement(glob, &glob->startCluster, Cluster);
+    Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
+
+    /* Save a cue point if this is a keyframe. */
+    if (is_keyframe) {
+      struct cue_entry *cue, *new_cue_list;
+
+      new_cue_list = realloc(glob->cue_list,
+                             (glob->cues + 1) * sizeof(struct cue_entry));
+      if (new_cue_list)
+        glob->cue_list = new_cue_list;
+      else
+        fatal("Failed to realloc cue list.");
+
+      cue = &glob->cue_list[glob->cues];
+      cue->time = glob->cluster_timecode;
+      cue->loc = glob->cluster_pos;
+      glob->cues++;
+    }
+  }
+
+  /* Write the Simple Block. */
+  Ebml_WriteID(glob, SimpleBlock);
+
+  block_length = (unsigned int)pkt->data.frame.sz + 4;
+  block_length |= 0x10000000;
+  Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
+
+  track_number = 1;
+  track_number |= 0x80;
+  Ebml_Write(glob, &track_number, 1);
+
+  Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2);
+
+  flags = 0;
+  if (is_keyframe)
+    flags |= 0x80;
+  if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
+    flags |= 0x08;
+  Ebml_Write(glob, &flags, 1);
+
+  Ebml_Write(glob, pkt->data.frame.buf, (unsigned int)pkt->data.frame.sz);
+}
+
+void write_webm_file_footer(struct EbmlGlobal *glob, int hash) {
+  EbmlLoc start_cues;
+  EbmlLoc start_cue_point;
+  EbmlLoc start_cue_tracks;
+  unsigned int i;
+
+  if (glob->cluster_open)
+    Ebml_EndSubElement(glob, &glob->startCluster);
+
+  glob->cue_pos = ftello(glob->stream);
+  Ebml_StartSubElement(glob, &start_cues, Cues);
+
+  for (i = 0; i < glob->cues; i++) {
+    struct cue_entry *cue = &glob->cue_list[i];
+    Ebml_StartSubElement(glob, &start_cue_point, CuePoint);
+    Ebml_SerializeUnsigned(glob, CueTime, cue->time);
+
+    Ebml_StartSubElement(glob, &start_cue_tracks, CueTrackPositions);
+    Ebml_SerializeUnsigned(glob, CueTrack, 1);
+    Ebml_SerializeUnsigned64(glob, CueClusterPosition,
+                             cue->loc - glob->position_reference);
+    Ebml_EndSubElement(glob, &start_cue_tracks);
+
+    Ebml_EndSubElement(glob, &start_cue_point);
+  }
+
+  Ebml_EndSubElement(glob, &start_cues);
+
+  /* Close the Segment. */
+  Ebml_EndSubElement(glob, &glob->startSegment);
+
+  /* Patch up the seek info block. */
+  write_webm_seek_info(glob);
+
+  /* Patch up the track id. */
+  fseeko(glob->stream, glob->track_id_pos, SEEK_SET);
+  Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash);
+
+  fseeko(glob->stream, 0, SEEK_END);
+}
diff --git a/webmenc.h b/webmenc.h
new file mode 100644
index 000000000..f3bc3ecd2
--- /dev/null
+++ b/webmenc.h
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef WEBMENC_H_
+#define WEBMENC_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(_MSC_VER)
+/* MSVS doesn't define off_t */
+typedef __int64 off_t;
+#else
+#include <stdint.h>
+#endif
+
+#include "tools_common.h"
+#include "vpx/vpx_encoder.h"
+
+typedef off_t EbmlLoc;
+
+struct cue_entry {
+  unsigned int time;
+  uint64_t loc;
+};
+
+struct EbmlGlobal {
+  int debug;
+
+  FILE *stream;
+  int64_t last_pts_ms;
+  vpx_rational_t framerate;
+
+  /* These pointers are to the start of an element */
+  off_t position_reference;
+  off_t seek_info_pos;
+  off_t segment_info_pos;
+  off_t track_pos;
+  off_t cue_pos;
+  off_t cluster_pos;
+
+  /* This pointer is to a specific element to be serialized */
+  off_t track_id_pos;
+
+  /* These pointers are to the size field of the element */
+  EbmlLoc startSegment;
+  EbmlLoc startCluster;
+
+  uint32_t cluster_timecode;
+  int cluster_open;
+
+  struct cue_entry *cue_list;
+  unsigned int cues;
+};
+
+/* Stereo 3D packed frame format */
+typedef enum stereo_format {
+  STEREO_FORMAT_MONO = 0,
+  STEREO_FORMAT_LEFT_RIGHT = 1,
+  STEREO_FORMAT_BOTTOM_TOP = 2,
+  STEREO_FORMAT_TOP_BOTTOM = 3,
+  STEREO_FORMAT_RIGHT_LEFT = 11
+} stereo_format_t;
+
+void write_webm_seek_element(struct EbmlGlobal *ebml,
+                             unsigned int id,
+                             off_t pos);
+
+void write_webm_file_header(struct EbmlGlobal *glob,
+                            const vpx_codec_enc_cfg_t *cfg,
+                            const struct vpx_rational *fps,
+                            stereo_format_t stereo_fmt,
+                            unsigned int fourcc);
+
+void write_webm_block(struct EbmlGlobal *glob,
+                      const vpx_codec_enc_cfg_t *cfg,
+                      const vpx_codec_cx_pkt_t *pkt);
+
+void write_webm_file_footer(struct EbmlGlobal *glob, int hash);
+
+#endif  // WEBMENC_H_