86 files changed, 1134 insertions, 5172 deletions
diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index 983f52d93..5bc657576 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -150,6 +150,7 @@ static void parse_command_line(int argc, const char **argv_,
   enc_cfg->rc_target_bitrate = default_bitrate;
   enc_cfg->kf_min_dist = default_kf_dist;
   enc_cfg->kf_max_dist = default_kf_dist;
+  enc_cfg->rc_end_usage = VPX_CQ;
 
   // initialize AppInput with default values
   app_input->frames_to_code = default_frames_to_code;
diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index e45b50c15..9f32bd8fe 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -663,5 +663,6 @@ int main(int argc, char **argv) {
   for (i = 0; i < cfg.ts_number_layers; ++i)
     vpx_video_writer_close(outfile[i]);
 
+  vpx_img_free(&raw);
   return EXIT_SUCCESS;
 }
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index a6dcc9875..3412ddd23 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -221,8 +221,12 @@ class ConvolveTest : public ::testing::TestWithParam<convolve_param_t> {
     }
 
     ::libvpx_test::ACMRandom prng;
-    for (int i = 0; i < kInputBufferSize; ++i)
-      input_[i] = prng.Rand8Extremes();
+    for (int i = 0; i < kInputBufferSize; ++i) {
+      if (i & 1)
+        input_[i] = 255;
+      else
+        input_[i] = prng.Rand8Extremes();
+    }
   }
 
   void SetConstantInput(int value) {
@@ -640,6 +644,28 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
     make_tuple(64, 64, &convolve8_ssse3)));
 #endif
 
+#if HAVE_AVX2
+const ConvolveFunctions convolve8_avx2(
+    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
+    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
+    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3);
+
+INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
+    make_tuple(4, 4, &convolve8_avx2),
+    make_tuple(8, 4, &convolve8_avx2),
+    make_tuple(4, 8, &convolve8_avx2),
+    make_tuple(8, 8, &convolve8_avx2),
+    make_tuple(16, 8, &convolve8_avx2),
+    make_tuple(8, 16, &convolve8_avx2),
+    make_tuple(16, 16, &convolve8_avx2),
+    make_tuple(32, 16, &convolve8_avx2),
+    make_tuple(16, 32, &convolve8_avx2),
+    make_tuple(32, 32, &convolve8_avx2),
+    make_tuple(64, 32, &convolve8_avx2),
+    make_tuple(32, 64, &convolve8_avx2),
+    make_tuple(64, 64, &convolve8_avx2)));
+#endif
+
 #if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
     vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 7900bcff7..20b1c8fbd 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -512,9 +512,7 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
 
-// FIXME (jingning, fgalligan): need to simplify the corresponding steps
-// in neov version accordingly, and re-enable the unit test
-#if HAVE_NEON_ASM && 0
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans16x16DCT,
     ::testing::Values(
@@ -536,4 +534,11 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
 #endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, Trans16x16DCT,
+    ::testing::Values(
+        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0)));
+#endif
 }  // namespace
diff --git a/test/sad_test.cc b/test/sad_test.cc
index a692891ad..adb191fd0 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -29,12 +29,22 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
 
+#if CONFIG_VP8_ENCODER
 typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr,
                                         int source_stride,
                                         const unsigned char *reference_ptr,
                                         int reference_stride,
                                         unsigned int max_sad);
 typedef std::tr1::tuple<int, int, sad_m_by_n_fn_t> sad_m_by_n_test_param_t;
+#endif
+#if CONFIG_VP9_ENCODER
+typedef unsigned int (*sad_m_by_n_fn_vp9_t)(const unsigned char *source_ptr,
+                                            int source_stride,
+                                            const unsigned char *reference_ptr,
+                                            int reference_stride);
+typedef std::tr1::tuple<int, int, sad_m_by_n_fn_vp9_t>
+                  sad_m_by_n_test_param_vp9_t;
+#endif
 
 typedef void (*sad_n_by_n_by_4_fn_t)(const uint8_t *src_ptr,
                                      int src_stride,
@@ -87,7 +97,7 @@ class SADTestBase : public ::testing::Test {
 
   // Sum of Absolute Differences. Given two blocks, calculate the absolute
   // difference between two pixels in the same relative location; accumulate.
-  unsigned int ReferenceSAD(unsigned int max_sad, int block_idx = 0) {
+  unsigned int ReferenceSAD(unsigned int max_sad, int block_idx) {
     unsigned int sad = 0;
     const uint8_t* const reference = GetReference(block_idx);
 
@@ -128,13 +138,43 @@ class SADTestBase : public ::testing::Test {
   ACMRandom rnd_;
 };
 
-class SADTest : public SADTestBase,
-    public ::testing::WithParamInterface<sad_m_by_n_test_param_t> {
+class SADx4Test
+    : public SADTestBase,
+      public ::testing::WithParamInterface<sad_n_by_n_by_4_test_param_t> {
+ public:
+  SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  void SADs(unsigned int *results) {
+    const uint8_t* refs[] = {GetReference(0), GetReference(1),
+                             GetReference(2), GetReference(3)};
+
+    REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
+                                      refs, reference_stride_,
+                                      results));
+  }
+
+  void CheckSADs() {
+    unsigned int reference_sad, exp_sad[4];
+
+    SADs(exp_sad);
+    for (int block = 0; block < 4; ++block) {
+      reference_sad = ReferenceSAD(UINT_MAX, block);
+
+      EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
+    }
+  }
+};
+
+#if CONFIG_VP8_ENCODER
+class SADTest
+    : public SADTestBase,
+      public ::testing::WithParamInterface<sad_m_by_n_test_param_t> {
  public:
   SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
 
  protected:
-  unsigned int SAD(unsigned int max_sad, int block_idx = 0) {
+  unsigned int SAD(unsigned int max_sad, int block_idx) {
     unsigned int ret;
     const uint8_t* const reference = GetReference(block_idx);
 
@@ -144,11 +184,11 @@ class SADTest : public SADTestBase,
     return ret;
   }
 
-  void CheckSad(unsigned int max_sad) {
+  void CheckSAD(unsigned int max_sad) {
     unsigned int reference_sad, exp_sad;
 
-    reference_sad = ReferenceSAD(max_sad);
-    exp_sad = SAD(max_sad);
+    reference_sad = ReferenceSAD(max_sad, 0);
+    exp_sad = SAD(max_sad, 0);
 
     if (reference_sad <= max_sad) {
       ASSERT_EQ(exp_sad, reference_sad);
@@ -158,43 +198,133 @@ class SADTest : public SADTestBase,
     }
   }
 };
+#endif  // CONFIG_VP8_ENCODER
 
-class SADx4Test : public SADTestBase,
-    public ::testing::WithParamInterface<sad_n_by_n_by_4_test_param_t> {
+#if CONFIG_VP9_ENCODER
+class SADVP9Test
+    : public SADTestBase,
+      public ::testing::WithParamInterface<sad_m_by_n_test_param_vp9_t> {
  public:
-  SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+  SADVP9Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
 
  protected:
-  void SADs(unsigned int *results) {
-    const uint8_t* refs[] = {GetReference(0), GetReference(1),
-                             GetReference(2), GetReference(3)};
+  unsigned int SAD(int block_idx) {
+    unsigned int ret;
+    const uint8_t* const reference = GetReference(block_idx);
 
-    REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
-                                      refs, reference_stride_,
-                                      results));
+    REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                            reference, reference_stride_));
+    return ret;
   }
 
-  void CheckSADs() {
-    unsigned int reference_sad, exp_sad[4];
+  void CheckSAD() {
+    unsigned int reference_sad, exp_sad;
 
-    SADs(exp_sad);
-    for (int block = 0; block < 4; block++) {
-      reference_sad = ReferenceSAD(UINT_MAX, block);
+    reference_sad = ReferenceSAD(UINT_MAX, 0);
+    exp_sad = SAD(0);
 
-      EXPECT_EQ(exp_sad[block], reference_sad) << "block " << block;
-    }
+    ASSERT_EQ(reference_sad, exp_sad);
   }
 };
+#endif  // CONFIG_VP9_ENCODER
 
 uint8_t* SADTestBase::source_data_ = NULL;
 uint8_t* SADTestBase::reference_data_ = NULL;
 
+#if CONFIG_VP8_ENCODER
 TEST_P(SADTest, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
   FillConstant(reference_data_, reference_stride_, 255);
-  CheckSad(UINT_MAX);
+  CheckSAD(UINT_MAX);
+}
+
+TEST_P(SADTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, 255);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD(UINT_MAX);
+}
+
+TEST_P(SADTest, ShortRef) {
+  int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD(UINT_MAX);
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD(UINT_MAX);
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD(UINT_MAX);
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, MaxSAD) {
+  // Verify that, when max_sad is set, the implementation does not return a
+  // value lower than the reference.
+  FillConstant(source_data_, source_stride_, 255);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD(128);
+}
+#endif  // CONFIG_VP8_ENCODER
+
+#if CONFIG_VP9_ENCODER
+TEST_P(SADVP9Test, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, 255);
+  CheckSAD();
+}
+
+TEST_P(SADVP9Test, MaxSrc) {
+  FillConstant(source_data_, source_stride_, 255);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD();
+}
+
+TEST_P(SADVP9Test, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADVP9Test, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
 }
 
+TEST_P(SADVP9Test, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  source_stride_ = tmp_stride;
+}
+#endif  // CONFIG_VP9_ENCODER
+
 TEST_P(SADx4Test, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
   FillConstant(GetReference(0), reference_stride_, 255);
@@ -204,12 +334,6 @@ TEST_P(SADx4Test, MaxRef) {
   CheckSADs();
 }
 
-TEST_P(SADTest, MaxSrc) {
-  FillConstant(source_data_, source_stride_, 255);
-  FillConstant(reference_data_, reference_stride_, 0);
-  CheckSad(UINT_MAX);
-}
-
 TEST_P(SADx4Test, MaxSrc) {
   FillConstant(source_data_, source_stride_, 255);
   FillConstant(GetReference(0), reference_stride_, 0);
@@ -219,15 +343,6 @@ TEST_P(SADx4Test, MaxSrc) {
   CheckSADs();
 }
 
-TEST_P(SADTest, ShortRef) {
-  int tmp_stride = reference_stride_;
-  reference_stride_ >>= 1;
-  FillRandom(source_data_, source_stride_);
-  FillRandom(reference_data_, reference_stride_);
-  CheckSad(UINT_MAX);
-  reference_stride_ = tmp_stride;
-}
-
 TEST_P(SADx4Test, ShortRef) {
   int tmp_stride = reference_stride_;
   reference_stride_ >>= 1;
@@ -240,17 +355,6 @@ TEST_P(SADx4Test, ShortRef) {
   reference_stride_ = tmp_stride;
 }
 
-TEST_P(SADTest, UnalignedRef) {
-  // The reference frame, but not the source frame, may be unaligned for
-  // certain types of searches.
-  int tmp_stride = reference_stride_;
-  reference_stride_ -= 1;
-  FillRandom(source_data_, source_stride_);
-  FillRandom(reference_data_, reference_stride_);
-  CheckSad(UINT_MAX);
-  reference_stride_ = tmp_stride;
-}
-
 TEST_P(SADx4Test, UnalignedRef) {
   // The reference frame, but not the source frame, may be unaligned for
   // certain types of searches.
@@ -265,15 +369,6 @@ TEST_P(SADx4Test, UnalignedRef) {
   reference_stride_ = tmp_stride;
 }
 
-TEST_P(SADTest, ShortSrc) {
-  int tmp_stride = source_stride_;
-  source_stride_ >>= 1;
-  FillRandom(source_data_, source_stride_);
-  FillRandom(reference_data_, reference_stride_);
-  CheckSad(UINT_MAX);
-  source_stride_ = tmp_stride;
-}
-
 TEST_P(SADx4Test, ShortSrc) {
   int tmp_stride = source_stride_;
   source_stride_ >>= 1;
@@ -286,14 +381,6 @@ TEST_P(SADx4Test, ShortSrc) {
   source_stride_ = tmp_stride;
 }
 
-TEST_P(SADTest, MaxSAD) {
-  // Verify that, when max_sad is set, the implementation does not return a
-  // value lower than the reference.
-  FillConstant(source_data_, source_stride_, 255);
-  FillConstant(reference_data_, reference_stride_, 0);
-  CheckSad(128);
-}
-
 using std::tr1::make_tuple;
 
 //------------------------------------------------------------------------------
@@ -304,27 +391,27 @@ const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c;
 const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c;
 const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c;
 const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c;
-#endif
-#if CONFIG_VP9_ENCODER
-const sad_m_by_n_fn_t sad_64x64_c_vp9 = vp9_sad64x64_c;
-const sad_m_by_n_fn_t sad_32x32_c_vp9 = vp9_sad32x32_c;
-const sad_m_by_n_fn_t sad_16x16_c_vp9 = vp9_sad16x16_c;
-const sad_m_by_n_fn_t sad_8x16_c_vp9 = vp9_sad8x16_c;
-const sad_m_by_n_fn_t sad_16x8_c_vp9 = vp9_sad16x8_c;
-const sad_m_by_n_fn_t sad_8x8_c_vp9 = vp9_sad8x8_c;
-const sad_m_by_n_fn_t sad_8x4_c_vp9 = vp9_sad8x4_c;
-const sad_m_by_n_fn_t sad_4x8_c_vp9 = vp9_sad4x8_c;
-const sad_m_by_n_fn_t sad_4x4_c_vp9 = vp9_sad4x4_c;
-#endif
 const sad_m_by_n_test_param_t c_tests[] = {
-#if CONFIG_VP8_ENCODER
   make_tuple(16, 16, sad_16x16_c),
   make_tuple(8, 16, sad_8x16_c),
   make_tuple(16, 8, sad_16x8_c),
   make_tuple(8, 8, sad_8x8_c),
   make_tuple(4, 4, sad_4x4_c),
-#endif
+};
+INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
+#endif  // CONFIG_VP8_ENCODER
+
 #if CONFIG_VP9_ENCODER
+const sad_m_by_n_fn_vp9_t sad_64x64_c_vp9 = vp9_sad64x64_c;
+const sad_m_by_n_fn_vp9_t sad_32x32_c_vp9 = vp9_sad32x32_c;
+const sad_m_by_n_fn_vp9_t sad_16x16_c_vp9 = vp9_sad16x16_c;
+const sad_m_by_n_fn_vp9_t sad_8x16_c_vp9 = vp9_sad8x16_c;
+const sad_m_by_n_fn_vp9_t sad_16x8_c_vp9 = vp9_sad16x8_c;
+const sad_m_by_n_fn_vp9_t sad_8x8_c_vp9 = vp9_sad8x8_c;
+const sad_m_by_n_fn_vp9_t sad_8x4_c_vp9 = vp9_sad8x4_c;
+const sad_m_by_n_fn_vp9_t sad_4x8_c_vp9 = vp9_sad4x8_c;
+const sad_m_by_n_fn_vp9_t sad_4x4_c_vp9 = vp9_sad4x4_c;
+const sad_m_by_n_test_param_vp9_t c_vp9_tests[] = {
   make_tuple(64, 64, sad_64x64_c_vp9),
   make_tuple(32, 32, sad_32x32_c_vp9),
   make_tuple(16, 16, sad_16x16_c_vp9),
@@ -334,11 +421,9 @@ const sad_m_by_n_test_param_t c_tests[] = {
   make_tuple(8, 4, sad_8x4_c_vp9),
   make_tuple(4, 8, sad_4x8_c_vp9),
   make_tuple(4, 4, sad_4x4_c_vp9),
-#endif
 };
-INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
+INSTANTIATE_TEST_CASE_P(C, SADVP9Test, ::testing::ValuesIn(c_vp9_tests));
 
-#if CONFIG_VP9_ENCODER
 const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c;
 const sad_n_by_n_by_4_fn_t sad_64x32x4d_c = vp9_sad64x32x4d_c;
 const sad_n_by_n_by_4_fn_t sad_32x64x4d_c = vp9_sad32x64x4d_c;
@@ -375,8 +460,8 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
 const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6;
 INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
                         make_tuple(16, 16, sad_16x16_armv6)));
-#endif
-#endif
+#endif  // CONFIG_VP8_ENCODER
+#endif  // HAVE_MEDIA
 
 #if HAVE_NEON
 #if CONFIG_VP8_ENCODER
@@ -391,8 +476,8 @@ INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
                         make_tuple(16, 8, sad_16x8_neon),
                         make_tuple(8, 8, sad_8x8_neon),
                         make_tuple(4, 4, sad_4x4_neon)));
-#endif
-#endif
+#endif  // CONFIG_VP8_ENCODER
+#endif  // HAVE_NEON
 
 //------------------------------------------------------------------------------
 // x86 functions
@@ -403,40 +488,39 @@ const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx;
 const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx;
 const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx;
 const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx;
-#endif
-#if CONFIG_VP9_ENCODER
-const sad_m_by_n_fn_t sad_16x16_mmx_vp9 = vp9_sad16x16_mmx;
-const sad_m_by_n_fn_t sad_8x16_mmx_vp9 = vp9_sad8x16_mmx;
-const sad_m_by_n_fn_t sad_16x8_mmx_vp9 = vp9_sad16x8_mmx;
-const sad_m_by_n_fn_t sad_8x8_mmx_vp9 = vp9_sad8x8_mmx;
-const sad_m_by_n_fn_t sad_4x4_mmx_vp9 = vp9_sad4x4_mmx;
-#endif
-
 const sad_m_by_n_test_param_t mmx_tests[] = {
-#if CONFIG_VP8_ENCODER
   make_tuple(16, 16, sad_16x16_mmx),
   make_tuple(8, 16, sad_8x16_mmx),
   make_tuple(16, 8, sad_16x8_mmx),
   make_tuple(8, 8, sad_8x8_mmx),
   make_tuple(4, 4, sad_4x4_mmx),
-#endif
+};
+INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
+#endif  // CONFIG_VP8_ENCODER
+
 #if CONFIG_VP9_ENCODER
+const sad_m_by_n_fn_vp9_t sad_16x16_mmx_vp9 = vp9_sad16x16_mmx;
+const sad_m_by_n_fn_vp9_t sad_8x16_mmx_vp9 = vp9_sad8x16_mmx;
+const sad_m_by_n_fn_vp9_t sad_16x8_mmx_vp9 = vp9_sad16x8_mmx;
+const sad_m_by_n_fn_vp9_t sad_8x8_mmx_vp9 = vp9_sad8x8_mmx;
+const sad_m_by_n_fn_vp9_t sad_4x4_mmx_vp9 = vp9_sad4x4_mmx;
+const sad_m_by_n_test_param_vp9_t mmx_vp9_tests[] = {
   make_tuple(16, 16, sad_16x16_mmx_vp9),
   make_tuple(8, 16, sad_8x16_mmx_vp9),
   make_tuple(16, 8, sad_16x8_mmx_vp9),
   make_tuple(8, 8, sad_8x8_mmx_vp9),
   make_tuple(4, 4, sad_4x4_mmx_vp9),
-#endif
 };
-INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
-#endif
+INSTANTIATE_TEST_CASE_P(MMX, SADVP9Test, ::testing::ValuesIn(mmx_vp9_tests));
+#endif  // CONFIG_VP9_ENCODER
+#endif  // HAVE_MMX
 
 #if HAVE_SSE
 #if CONFIG_VP9_ENCODER
 #if CONFIG_USE_X86INC
-const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
-const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
-INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
+const sad_m_by_n_fn_vp9_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
+const sad_m_by_n_fn_vp9_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
+INSTANTIATE_TEST_CASE_P(SSE, SADVP9Test, ::testing::Values(
                         make_tuple(4, 4, sad_4x4_sse_vp9),
                         make_tuple(4, 8, sad_4x8_sse_vp9)));
 
@@ -456,32 +540,30 @@ const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt;
 const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt;
 const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
 const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
-#endif
-#if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
-const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
-const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
-const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
-const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
-const sad_m_by_n_fn_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
-const sad_m_by_n_fn_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
-const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
-const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
-const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
-const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
-const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
-#endif
-#endif
 const sad_m_by_n_test_param_t sse2_tests[] = {
-#if CONFIG_VP8_ENCODER
   make_tuple(16, 16, sad_16x16_wmt),
   make_tuple(8, 16, sad_8x16_wmt),
   make_tuple(16, 8, sad_16x8_wmt),
   make_tuple(8, 8, sad_8x8_wmt),
   make_tuple(4, 4, sad_4x4_wmt),
-#endif
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
+#endif  // CONFIG_VP8_ENCODER
+
 #if CONFIG_VP9_ENCODER
 #if CONFIG_USE_X86INC
+const sad_m_by_n_fn_vp9_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
+const sad_m_by_n_fn_vp9_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
+const sad_m_by_n_fn_vp9_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
+const sad_m_by_n_fn_vp9_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
+const sad_m_by_n_fn_vp9_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
+const sad_m_by_n_fn_vp9_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
+const sad_m_by_n_fn_vp9_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
+const sad_m_by_n_fn_vp9_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
+const sad_m_by_n_fn_vp9_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
+const sad_m_by_n_fn_vp9_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
+const sad_m_by_n_fn_vp9_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
+const sad_m_by_n_test_param_vp9_t sse2_vp9_tests[] = {
   make_tuple(64, 64, sad_64x64_sse2_vp9),
   make_tuple(64, 32, sad_64x32_sse2_vp9),
   make_tuple(32, 64, sad_32x64_sse2_vp9),
@@ -493,13 +575,9 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
   make_tuple(8, 16, sad_8x16_sse2_vp9),
   make_tuple(8, 8, sad_8x8_sse2_vp9),
   make_tuple(8, 4, sad_8x4_sse2_vp9),
-#endif
-#endif
 };
-INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
+INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::ValuesIn(sse2_vp9_tests));
 
-#if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
 const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
 const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
 const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
@@ -523,9 +601,9 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
                         make_tuple(8, 16, sad_8x16x4d_sse2),
                         make_tuple(8, 8, sad_8x8x4d_sse2),
                         make_tuple(8, 4, sad_8x4x4d_sse2)));
-#endif
-#endif
-#endif
+#endif  // CONFIG_USE_X86INC
+#endif  // CONFIG_VP9_ENCODER
+#endif  // HAVE_SSE2
 
 #if HAVE_SSE3
 #if CONFIG_VP8_ENCODER
@@ -540,8 +618,8 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
                         make_tuple(8, 16, sad_8x16x4d_sse3),
                         make_tuple(8, 8, sad_8x8x4d_sse3),
                         make_tuple(4, 4, sad_4x4x4d_sse3)));
-#endif
-#endif
+#endif  // CONFIG_VP8_ENCODER
+#endif  // HAVE_SSE3
 
 #if HAVE_SSSE3
 #if CONFIG_USE_X86INC
@@ -549,8 +627,8 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
 const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
 INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
                         make_tuple(16, 16, sad_16x16_sse3)));
-#endif
-#endif
-#endif
+#endif  // CONFIG_VP8_ENCODER
+#endif  // CONFIG_USE_X86INC
+#endif  // HAVE_SSSE3
 
 }  // namespace
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 472111c23..bb024291e 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -307,7 +307,7 @@ run_tests() {
   local test_name="${VPX_TEST_NAME}"
 
   if [ -z "${test_name}" ]; then
-    test_name="$(basename \"${0%.*}\")"
+    test_name="$(basename "${0%.*}")"
   fi
 
   if [ "${VPX_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then
diff --git a/third_party/libmkv/EbmlBufferWriter.c b/third_party/libmkv/EbmlBufferWriter.c
deleted file mode 100644
index 116cbc4e7..000000000
--- a/third_party/libmkv/EbmlBufferWriter.c
+++ /dev/null
@@ -1,52 +0,0 @@
-// #include <strmif.h>
-#include "EbmlBufferWriter.h"
-#include "EbmlWriter.h"
-// #include <cassert>
-// #include <limits>
-// #include <malloc.h>  //_alloca
-#include <stdlib.h>
-#include <wchar.h>
-#include <string.h>
-
-void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) {
-  unsigned char *src = glob->buf;
-  src += glob->offset;
-  memcpy(src, buffer_in, len);
-  glob->offset += len;
-}
-
-static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q) {
-  while (q != p) {
-    --q;
-    memcpy(&(glob->buf[glob->offset]), q, 1);
-    glob->offset++;
-  }
-}
-
-void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len) {
-  // assert(buf);
-
-  const unsigned char *const p = (const unsigned char *)(buffer_in);
-  const unsigned char *const q = p + len;
-
-  _Serialize(glob, p, q);
-}
-
-
-void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id) {
-  Ebml_WriteID(glob, class_id);
-  ebmlLoc->offset = glob->offset;
-  // todo this is always taking 8 bytes, this may need later optimization
-  unsigned long long unknownLen =  0x01FFFFFFFFFFFFFFLLU;
-  Ebml_Serialize(glob, (void *)&unknownLen, 8); // this is a key that says lenght unknown
-}
-
-void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) {
-  unsigned long long size = glob->offset - ebmlLoc->offset - 8;
-  unsigned long long curOffset = glob->offset;
-  glob->offset = ebmlLoc->offset;
-  size |=  0x0100000000000000LLU;
-  Ebml_Serialize(glob, &size, 8);
-  glob->offset = curOffset;
-}
-
diff --git a/third_party/libmkv/EbmlBufferWriter.h b/third_party/libmkv/EbmlBufferWriter.h
deleted file mode 100644
index acd5c2a28..000000000
--- a/third_party/libmkv/EbmlBufferWriter.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef EBMLBUFFERWRITER_HPP
-#define EBMLBUFFERWRITER_HPP
-
-typedef struct {
-  unsigned long long offset;
-} EbmlLoc;
-
-typedef struct {
-  unsigned char *buf;
-  unsigned int length;
-  unsigned int offset;
-} EbmlGlobal;
-
-
-void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id);
-void Ebml_EndSubElement(EbmlGlobal *glob,  EbmlLoc *ebmlLoc);
-
-
-#endif
diff --git a/third_party/libmkv/EbmlIDs.h b/third_party/libmkv/EbmlIDs.h
deleted file mode 100644
index 44d438583..000000000
--- a/third_party/libmkv/EbmlIDs.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef MKV_DEFS_HPP
-#define MKV_DEFS_HPP 1
-
-/* Commenting out values not available in webm, but available in matroska */
-
-enum mkv {
-  EBML = 0x1A45DFA3,
-  EBMLVersion = 0x4286,
-  EBMLReadVersion = 0x42F7,
-  EBMLMaxIDLength = 0x42F2,
-  EBMLMaxSizeLength = 0x42F3,
-  DocType = 0x4282,
-  DocTypeVersion = 0x4287,
-  DocTypeReadVersion = 0x4285,
-/* CRC_32 = 0xBF, */
-  Void = 0xEC,
-  SignatureSlot = 0x1B538667,
-  SignatureAlgo = 0x7E8A,
-  SignatureHash = 0x7E9A,
-  SignaturePublicKey = 0x7EA5,
-  Signature = 0x7EB5,
-  SignatureElements = 0x7E5B,
-  SignatureElementList = 0x7E7B,
-  SignedElement = 0x6532,
-  /* segment */
-  Segment = 0x18538067,
-  /* Meta Seek Information */
-  SeekHead = 0x114D9B74,
-  Seek = 0x4DBB,
-  SeekID = 0x53AB,
-  SeekPosition = 0x53AC,
-  /* Segment Information */
-  Info = 0x1549A966,
-/* SegmentUID = 0x73A4, */
-/* SegmentFilename = 0x7384, */
-/* PrevUID = 0x3CB923, */
-/* PrevFilename = 0x3C83AB, */
-/* NextUID = 0x3EB923, */
-/* NextFilename = 0x3E83BB, */
-/* SegmentFamily = 0x4444, */
-/* ChapterTranslate = 0x6924, */
-/* ChapterTranslateEditionUID = 0x69FC, */
-/* ChapterTranslateCodec = 0x69BF, */
-/* ChapterTranslateID = 0x69A5, */
-  TimecodeScale = 0x2AD7B1,
-  Segment_Duration = 0x4489,
-  DateUTC = 0x4461,
-/* Title = 0x7BA9, */
-  MuxingApp = 0x4D80,
-  WritingApp = 0x5741,
-  /* Cluster */
-  Cluster = 0x1F43B675,
-  Timecode = 0xE7,
-/* SilentTracks = 0x5854, */
-/* SilentTrackNumber = 0x58D7, */
-/* Position = 0xA7, */
-  PrevSize = 0xAB,
-  BlockGroup = 0xA0,
-  Block = 0xA1,
-/* BlockVirtual = 0xA2, */
-  BlockAdditions = 0x75A1,
-  BlockMore = 0xA6,
-  BlockAddID = 0xEE,
-  BlockAdditional = 0xA5,
-  BlockDuration = 0x9B,
-/* ReferencePriority = 0xFA, */
-  ReferenceBlock = 0xFB,
-/* ReferenceVirtual = 0xFD, */
-/* CodecState = 0xA4, */
-/* Slices = 0x8E, */
-/* TimeSlice = 0xE8, */
-  LaceNumber = 0xCC,
-/* FrameNumber = 0xCD, */
-/* BlockAdditionID = 0xCB, */
-/* MkvDelay = 0xCE, */
-/* Cluster_Duration = 0xCF, */
-  SimpleBlock = 0xA3,
-/* EncryptedBlock = 0xAF, */
-  /* Track */
-  Tracks = 0x1654AE6B,
-  TrackEntry = 0xAE,
-  TrackNumber = 0xD7,
-  TrackUID = 0x73C5,
-  TrackType = 0x83,
-  FlagEnabled = 0xB9,
-  FlagDefault = 0x88,
-  FlagForced = 0x55AA,
-  FlagLacing = 0x9C,
-/* MinCache = 0x6DE7, */
-/* MaxCache = 0x6DF8, */
-  DefaultDuration = 0x23E383,
-/* TrackTimecodeScale = 0x23314F, */
-/* TrackOffset = 0x537F, */
-  MaxBlockAdditionID = 0x55EE,
-  Name = 0x536E,
-  Language = 0x22B59C,
-  CodecID = 0x86,
-  CodecPrivate = 0x63A2,
-  CodecName = 0x258688,
-/* AttachmentLink = 0x7446, */
-/* CodecSettings = 0x3A9697, */
-/* CodecInfoURL = 0x3B4040, */
-/* CodecDownloadURL = 0x26B240, */
-/* CodecDecodeAll = 0xAA, */
-/* TrackOverlay = 0x6FAB, */
-/* TrackTranslate = 0x6624, */
-/* TrackTranslateEditionUID = 0x66FC, */
-/* TrackTranslateCodec = 0x66BF, */
-/* TrackTranslateTrackID = 0x66A5, */
-  /* video */
-  Video = 0xE0,
-  FlagInterlaced = 0x9A,
-  StereoMode = 0x53B8,
-  AlphaMode = 0x53C0,
-  PixelWidth = 0xB0,
-  PixelHeight = 0xBA,
-  PixelCropBottom = 0x54AA,
-  PixelCropTop = 0x54BB,
-  PixelCropLeft = 0x54CC,
-  PixelCropRight = 0x54DD,
-  DisplayWidth = 0x54B0,
-  DisplayHeight = 0x54BA,
-  DisplayUnit = 0x54B2,
-  AspectRatioType = 0x54B3,
-/* ColourSpace = 0x2EB524, */
-/* GammaValue = 0x2FB523, */
-  FrameRate = 0x2383E3,
-  /* end video */
-  /* audio */
-  Audio = 0xE1,
-  SamplingFrequency = 0xB5,
-  OutputSamplingFrequency = 0x78B5,
-  Channels = 0x9F,
-/* ChannelPositions = 0x7D7B, */
-  BitDepth = 0x6264,
-  /* end audio */
-  /* content encoding */
-/* ContentEncodings = 0x6d80, */
-/* ContentEncoding = 0x6240, */
-/* ContentEncodingOrder = 0x5031, */
-/* ContentEncodingScope = 0x5032, */
-/* ContentEncodingType = 0x5033, */
-/* ContentCompression = 0x5034, */
-/* ContentCompAlgo = 0x4254, */
-/* ContentCompSettings = 0x4255, */
-/* ContentEncryption = 0x5035, */
-/* ContentEncAlgo = 0x47e1, */
-/* ContentEncKeyID = 0x47e2, */
-/* ContentSignature = 0x47e3, */
-/* ContentSigKeyID = 0x47e4, */
-/* ContentSigAlgo = 0x47e5, */
-/* ContentSigHashAlgo = 0x47e6, */
-  /* end content encoding */
-  /* Cueing Data */
-  Cues = 0x1C53BB6B,
-  CuePoint = 0xBB,
-  CueTime = 0xB3,
-  CueTrackPositions = 0xB7,
-  CueTrack = 0xF7,
-  CueClusterPosition = 0xF1,
-  CueBlockNumber = 0x5378
-/* CueCodecState = 0xEA, */
-/* CueReference = 0xDB, */
-/* CueRefTime = 0x96, */
-/* CueRefCluster = 0x97, */
-/* CueRefNumber = 0x535F, */
-/* CueRefCodecState = 0xEB, */
-  /* Attachment */
-/* Attachments = 0x1941A469, */
-/* AttachedFile = 0x61A7, */
-/* FileDescription = 0x467E, */
-/* FileName = 0x466E, */
-/* FileMimeType = 0x4660, */
-/* FileData = 0x465C, */
-/* FileUID = 0x46AE, */
-/* FileReferral = 0x4675, */
-  /* Chapters */
-/* Chapters = 0x1043A770, */
-/* EditionEntry = 0x45B9, */
-/* EditionUID = 0x45BC, */
-/* EditionFlagHidden = 0x45BD, */
-/* EditionFlagDefault = 0x45DB, */
-/* EditionFlagOrdered = 0x45DD, */
-/* ChapterAtom = 0xB6, */
-/* ChapterUID = 0x73C4, */
-/* ChapterTimeStart = 0x91, */
-/* ChapterTimeEnd = 0x92, */
-/* ChapterFlagHidden = 0x98, */
-/* ChapterFlagEnabled = 0x4598, */
-/* ChapterSegmentUID = 0x6E67, */
-/* ChapterSegmentEditionUID = 0x6EBC, */
-/* ChapterPhysicalEquiv = 0x63C3, */
-/* ChapterTrack = 0x8F, */
-/* ChapterTrackNumber = 0x89, */
-/* ChapterDisplay = 0x80, */
-/* ChapString = 0x85, */
-/* ChapLanguage = 0x437C, */
-/* ChapCountry = 0x437E, */
-/* ChapProcess = 0x6944, */
-/* ChapProcessCodecID = 0x6955, */
-/* ChapProcessPrivate = 0x450D, */
-/* ChapProcessCommand = 0x6911, */
-/* ChapProcessTime = 0x6922, */
-/* ChapProcessData = 0x6933, */
-  /* Tagging */
-/* Tags = 0x1254C367, */
-/* Tag = 0x7373, */
-/* Targets = 0x63C0, */
-/* TargetTypeValue = 0x68CA, */
-/* TargetType = 0x63CA, */
-/* Tagging_TrackUID = 0x63C5, */
-/* Tagging_EditionUID = 0x63C9, */
-/* Tagging_ChapterUID = 0x63C4, */
-/* AttachmentUID = 0x63C6, */
-/* SimpleTag = 0x67C8, */
-/* TagName = 0x45A3, */
-/* TagLanguage = 0x447A, */
-/* TagDefault = 0x4484, */
-/* TagString = 0x4487, */
-/* TagBinary = 0x4485, */
-};
-#endif
diff --git a/third_party/libmkv/EbmlWriter.c b/third_party/libmkv/EbmlWriter.c
deleted file mode 100644
index 27cfe861c..000000000
--- a/third_party/libmkv/EbmlWriter.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "EbmlWriter.h"
-#include <stdlib.h>
-#include <wchar.h>
-#include <string.h>
-#include <limits.h>
-#if defined(_MSC_VER)
-#define LITERALU64(n) n
-#else
-#define LITERALU64(n) n##LLU
-#endif
-
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) {
-  /* TODO check and make sure we are not > than 0x0100000000000000LLU */
-  unsigned char size = 8; /* size in bytes to output */
-
-  /* mask to compare for byte size */
-  int64_t minVal = 0xff;
-
-  for (size = 1; size < 8; size ++) {
-    if (val < minVal)
-      break;
-
-    minVal = (minVal << 7);
-  }
-
-  val |= (((uint64_t)0x80) << ((size - 1) * 7));
-
-  Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
-}
-
-void Ebml_WriteString(EbmlGlobal *glob, const char *str) {
-  const size_t size_ = strlen(str);
-  const uint64_t  size = size_;
-  Ebml_WriteLen(glob, size);
-  /* TODO: it's not clear from the spec whether the nul terminator
-   * should be serialized too.  For now we omit the null terminator.
-   */
-  Ebml_Write(glob, str, (unsigned long)size);
-}
-
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) {
-  const size_t strlen = wcslen(wstr);
-
-  /* TODO: it's not clear from the spec whether the nul terminator
-   * should be serialized too.  For now we include it.
-   */
-  const uint64_t  size = strlen;
-
-  Ebml_WriteLen(glob, size);
-  Ebml_Write(glob, wstr, (unsigned long)size);
-}
-
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) {
-  int len;
-
-  if (class_id >= 0x01000000)
-    len = 4;
-  else if (class_id >= 0x00010000)
-    len = 3;
-  else if (class_id >= 0x00000100)
-    len = 2;
-  else
-    len = 1;
-
-  Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
-}
-
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
-  unsigned char sizeSerialized = 8 | 0x80;
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), 8);
-}
-
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) {
-  unsigned char size = 8; /* size in bytes to output */
-  unsigned char sizeSerialized = 0;
-  unsigned long minVal;
-
-  Ebml_WriteID(glob, class_id);
-  minVal = 0x7fLU; /* mask to compare for byte size */
-
-  for (size = 1; size < 4; size ++) {
-    if (ui < minVal) {
-      break;
-    }
-
-    minVal <<= 7;
-  }
-
-  sizeSerialized = 0x80 | size;
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), size);
-}
-/* TODO: perhaps this is a poor name for this id serializer helper function */
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
-  int size;
-  for (size = 4; size > 1; size--) {
-    if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
-      break;
-  }
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteLen(glob, size);
-  Ebml_WriteID(glob, bin);
-}
-
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) {
-  unsigned char len = 0x88;
-
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &len, sizeof(len), 1);
-  Ebml_Serialize(glob,  &d, sizeof(d), 8);
-}
-
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val) {
-  signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
-  Ebml_Serialize(glob, &out, sizeof(out), 3);
-}
-
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) {
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteString(glob, s);
-}
-
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) {
-  Ebml_WriteID(glob,  class_id);
-  Ebml_WriteUTF8(glob,  s);
-}
-
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) {
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteLen(glob, data_length);
-  Ebml_Write(glob,  data, data_length);
-}
-
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) {
-  unsigned char tmp = 0;
-  unsigned long i = 0;
-
-  Ebml_WriteID(glob, 0xEC);
-  Ebml_WriteLen(glob, vSize);
-
-  for (i = 0; i < vSize; i++) {
-    Ebml_Write(glob, &tmp, 1);
-  }
-}
-
-/* TODO Serialize Date */
diff --git a/third_party/libmkv/EbmlWriter.h b/third_party/libmkv/EbmlWriter.h
deleted file mode 100644
index b94f75733..000000000
--- a/third_party/libmkv/EbmlWriter.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef EBMLWRITER_HPP
-#define EBMLWRITER_HPP
-#include <stddef.h>
-#include "vpx/vpx_integer.h"
-
-/* note: you must define write and serialize functions as well as your own
- * EBML_GLOBAL
- *
- * These functions MUST be implemented
- */
-
-typedef struct EbmlGlobal EbmlGlobal;
-void  Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
-void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
-
-/*****/
-
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val);
-void Ebml_WriteString(EbmlGlobal *glob, const char *str);
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui);
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
-/* TODO make this more generic to signed */
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
-/* TODO need date function */
-#endif
diff --git a/third_party/libmkv/Makefile b/third_party/libmkv/Makefile
deleted file mode 100644
index 71aee2397..000000000
--- a/third_party/libmkv/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-#Variables
-CC=gcc
-LINKER=gcc
-FLAGS=-g -Wall
-
-all: testlibmkv
-
-#Build Targets
-EbmlWriter.o: EbmlWriter.c EbmlWriter.h
-	$(CC) $(FLAGS) -c EbmlWriter.c
-
-EbmlBufferWriter.o: EbmlBufferWriter.c EbmlBufferWriter.h
-	$(CC) $(FLAGS) -c EbmlBufferWriter.c
-
-WebMElement.o: WebMElement.c WebMElement.h
-	$(CC) $(FLAGS) -c WebMElement.c
-
-testlibmkv.o: testlibmkv.c
-	$(CC) $(FLAGS) -c testlibmkv.c
-
-testlibmkv: testlibmkv.o WebMElement.o EbmlBufferWriter.o EbmlWriter.o
-	$(LINKER) $(FLAGS) -o testlibmkv testlibmkv.o WebMElement.o EbmlBufferWriter.o EbmlWriter.o
-
-clean:
-	rm -rf *.o testlibmkv
diff --git a/third_party/libmkv/WebMElement.c b/third_party/libmkv/WebMElement.c
deleted file mode 100644
index 6c3670a28..000000000
--- a/third_party/libmkv/WebMElement.c
+++ /dev/null
@@ -1,217 +0,0 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS.  All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-
-#include "EbmlBufferWriter.h"
-#include "EbmlIDs.h"
-#include "WebMElement.h"
-#include <stdio.h>
-#include "vpx/vpx_integer.h"
-
-#define kVorbisPrivateMaxSize  4000
-
-void writeHeader(EbmlGlobal *glob) {
-  EbmlLoc start;
-  Ebml_StartSubElement(glob, &start, EBML);
-  Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
-  Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); // EBML Read Version
-  Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); // EBML Max ID Length
-  Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); // EBML Max Size Length
-  Ebml_SerializeString(glob, DocType, "webm"); // Doc Type
-  Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); // Doc Type Version
-  Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); // Doc Type Read Version
-  Ebml_EndSubElement(glob, &start);
-}
-
-void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode,
-                      int isKeyframe, unsigned char lacingFlag, int discardable,
-                      unsigned char *data, unsigned long dataLength) {
-  Ebml_WriteID(glob, SimpleBlock);
-  unsigned long blockLength = 4 + dataLength;
-  blockLength |= 0x10000000; // TODO check length < 0x0FFFFFFFF
-  Ebml_Serialize(glob, &blockLength, sizeof(blockLength), 4);
-  trackNumber |= 0x80;  // TODO check track nubmer < 128
-  Ebml_Write(glob, &trackNumber, 1);
-  // Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
-  Ebml_Serialize(glob, &timeCode, sizeof(timeCode), 2);
-  unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
-  Ebml_Write(glob, &flags, 1);
-  Ebml_Write(glob, data, dataLength);
-}
-
-static uint64_t generateTrackID(unsigned int trackNumber) {
-  uint64_t t = time(NULL) * trackNumber;
-  uint64_t r = rand();
-  r = r << 32;
-  r +=  rand();
-  uint64_t rval = t ^ r;
-  return rval;
-}
-
-void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber,
-                     int flagLacing, const char *codecId,
-                     unsigned int pixelWidth, unsigned int pixelHeight,
-                     double frameRate) {
-  EbmlLoc start;
-  Ebml_StartSubElement(glob, &start, TrackEntry);
-  Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
-  uint64_t trackID = generateTrackID(trackNumber);
-  Ebml_SerializeUnsigned(glob, TrackUID, trackID);
-  Ebml_SerializeString(glob, CodecName, "VP8");  // TODO shouldn't be fixed
-
-  Ebml_SerializeUnsigned(glob, TrackType, 1); // video is always 1
-  Ebml_SerializeString(glob, CodecID, codecId);
-  {
-    EbmlLoc videoStart;
-    Ebml_StartSubElement(glob, &videoStart, Video);
-    Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
-    Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
-    Ebml_SerializeFloat(glob, FrameRate, frameRate);
-    Ebml_EndSubElement(glob, &videoStart); // Video
-  }
-  Ebml_EndSubElement(glob, &start); // Track Entry
-}
-void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber,
-                     int flagLacing, const char *codecId,
-                     double samplingFrequency, unsigned int channels,
-                     unsigned char *private, unsigned long privateSize) {
-  EbmlLoc start;
-  Ebml_StartSubElement(glob, &start, TrackEntry);
-  Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
-  uint64_t trackID = generateTrackID(trackNumber);
-  Ebml_SerializeUnsigned(glob, TrackUID, trackID);
-  Ebml_SerializeUnsigned(glob, TrackType, 2); // audio is always 2
-  // I am using defaults for thesed required fields
-  /*  Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
-      Ebml_SerializeUnsigned(glob, FlagDefault, 1);
-      Ebml_SerializeUnsigned(glob, FlagForced, 1);
-      Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
-  Ebml_SerializeString(glob, CodecID, codecId);
-  Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
-
-  Ebml_SerializeString(glob, CodecName, "VORBIS");  // fixed for now
-  {
-    EbmlLoc AudioStart;
-    Ebml_StartSubElement(glob, &AudioStart, Audio);
-    Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
-    Ebml_SerializeUnsigned(glob, Channels, channels);
-    Ebml_EndSubElement(glob, &AudioStart);
-  }
-  Ebml_EndSubElement(glob, &start);
-}
-void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo, unsigned long timeCodeScale, double duration) {
-  Ebml_StartSubElement(ebml, startInfo, Info);
-  Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale);
-  Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); // Currently fixed to using milliseconds
-  Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1");
-  Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1");
-  Ebml_EndSubElement(ebml, startInfo);
-}
-
-/*
-void Mkv_InitializeSegment(Ebml& ebml_out, EbmlLoc& ebmlLoc)
-{
-    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x18538067);
-}
-
-void Mkv_InitializeSeek(Ebml& ebml_out, EbmlLoc& ebmlLoc)
-{
-    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x114d9b74);
-}
-void Mkv_WriteSeekInformation(Ebml& ebml_out, SeekStruct& seekInformation)
-{
-    EbmlLoc ebmlLoc;
-    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x4dbb);
-    Ebml_SerializeString(ebml_out, 0x53ab, seekInformation.SeekID);
-    Ebml_SerializeUnsigned(ebml_out, 0x53ac, seekInformation.SeekPosition);
-    Ebml_EndSubElement(ebml_out, ebmlLoc);
-}
-
-void Mkv_WriteSegmentInformation(Ebml& ebml_out, SegmentInformationStruct& segmentInformation)
-{
-    Ebml_SerializeUnsigned(ebml_out, 0x73a4, segmentInformation.segmentUID);
-    if (segmentInformation.filename != 0)
-        Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename);
-    Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale);
-    Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration);
-    // TODO date
-    Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX");
-    Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp);
-}
-
-void Mkv_InitializeTrack(Ebml& ebml_out, EbmlLoc& ebmlLoc)
-{
-    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1654AE6B);
-}
-
-static void Mkv_WriteGenericTrackData(Ebml& ebml_out, TrackStruct& track)
-{
-    Ebml_SerializeUnsigned(ebml_out, 0xD7, track.TrackNumber);
-    Ebml_SerializeUnsigned(ebml_out, 0x73C5, track.TrackUID);
-    Ebml_SerializeUnsigned(ebml_out, 0x83, track.TrackType);
-    Ebml_SerializeUnsigned(ebml_out, 0xB9, track.FlagEnabled ? 1 :0);
-    Ebml_SerializeUnsigned(ebml_out, 0x88, track.FlagDefault ? 1 :0);
-    Ebml_SerializeUnsigned(ebml_out, 0x55AA, track.FlagForced ? 1 :0);
-    if (track.Language != 0)
-        Ebml_SerializeString(ebml_out, 0x22B59C, track.Language);
-    if (track.CodecID != 0)
-        Ebml_SerializeString(ebml_out, 0x86, track.CodecID);
-    if (track.CodecPrivate != 0)
-        Ebml_SerializeData(ebml_out, 0x63A2, track.CodecPrivate, track.CodecPrivateLength);
-    if (track.CodecName != 0)
-        Ebml_SerializeWString(ebml_out, 0x258688, track.CodecName);
-}
-
-void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video)
-{
-    EbmlLoc trackHeadLoc, videoHeadLoc;
-    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);  // start Track
-    Mkv_WriteGenericTrackData(ebml_out, track);
-    Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0);  // start Video
-    Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0);
-    Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth);
-    Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight);
-    Ebml_SerializeUnsigned(ebml_out, 0x54B0, video.PixelDisplayWidth);
-    Ebml_SerializeUnsigned(ebml_out, 0x54BA, video.PixelDisplayHeight);
-    Ebml_SerializeUnsigned(ebml_out, 0x54B2, video.displayUnit);
-    Ebml_SerializeFloat(ebml_out, 0x2383E3, video.FrameRate);
-    Ebml_EndSubElement(ebml_out, videoHeadLoc);
-    Ebml_EndSubElement(ebml_out, trackHeadLoc);
-
-}
-
-void Mkv_WriteAudioTrack(Ebml& ebml_out, TrackStruct & track, AudioTrackStruct& video)
-{
-    EbmlLoc trackHeadLoc, audioHeadLoc;
-    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);
-    Mkv_WriteGenericTrackData(ebml_out, track);
-    Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0);  // start Audio
-    Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency);
-    Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels);
-    Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth);
-    Ebml_EndSubElement(ebml_out, audioHeadLoc); // end audio
-    Ebml_EndSubElement(ebml_out, trackHeadLoc);
-}
-
-void Mkv_WriteEbmlClusterHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, ClusterHeadStruct & clusterHead)
-{
-    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1F43B675);
-    Ebml_SerializeUnsigned(ebml_out, 0x6264, clusterHead.TimeCode);
-}
-
-void Mkv_WriteSimpleBlockHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, SimpleBlockStruct& block)
-{
-    Ebml_StartSubElement(ebml_out, ebmlLoc, 0xA3);
-    Ebml_Write1UInt(ebml_out, block.TrackNumber);
-    Ebml_WriteSigned16(ebml_out,block.TimeCode);
-    unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable;
-    Ebml_Write1UInt(ebml_out, flags);  // TODO this may be the wrong function
-    Ebml_Serialize(ebml_out, block.data, block.dataLength);
-    Ebml_EndSubElement(ebml_out,ebmlLoc);
-}
-*/
diff --git a/third_party/libmkv/WebMElement.h b/third_party/libmkv/WebMElement.h
deleted file mode 100644
index 0e5ec2036..000000000
--- a/third_party/libmkv/WebMElement.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS.  All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-
-#ifndef MKV_CONTEXT_HPP
-#define MKV_CONTEXT_HPP 1
-
-// these are helper functions
-void writeHeader(EbmlGlobal *ebml);
-void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo,
-                             unsigned long timeCodeScale, double duration);
-// this function is a helper only, it assumes a lot of defaults
-void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber,
-                     int flagLacing, const char *codecId,
-                     unsigned int pixelWidth, unsigned int pixelHeight,
-                     double frameRate);
-void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber,
-                     int flagLacing, const char *codecId,
-                     double samplingFrequency, unsigned int channels,
-                     unsigned char *private, unsigned long privateSize);
-
-void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber,
-                      short timeCode, int isKeyframe,
-                      unsigned char lacingFlag, int discardable,
-                      unsigned char *data, unsigned long dataLength);
-
-#endif
diff --git a/third_party/libmkv/testlibmkv.c b/third_party/libmkv/testlibmkv.c
deleted file mode 100644
index 97bcf956a..000000000
--- a/third_party/libmkv/testlibmkv.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS.  All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-
-
-#include "EbmlIDs.h"
-#include "EbmlBufferWriter.h"
-#include "WebMElement.h"
-
-#include <stdio.h>
-int main(int argc, char *argv[]) {
-  // init the datatype we're using for ebml output
-  unsigned char data[8192];
-  EbmlGlobal ebml;
-  ebml.buf = data;
-  ebml.offset = 0;
-  ebml.length = 8192;
-
-  writeHeader(&ebml);
-  {
-    EbmlLoc startSegment;
-    Ebml_StartSubElement(&ebml, &startSegment, Segment); // segment
-    {
-      // segment info
-      EbmlLoc startInfo;
-      Ebml_StartSubElement(&ebml, &startInfo, Info);
-      Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv");
-      Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv");
-      Ebml_EndSubElement(&ebml, &startInfo);
-    }
-
-    {
-      EbmlLoc trackStart;
-      Ebml_StartSubElement(&ebml, &trackStart, Tracks);
-      writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97);
-      // writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0);
-      Ebml_EndSubElement(&ebml, &trackStart);
-    }
-
-    {
-      EbmlLoc clusterStart;
-      Ebml_StartSubElement(&ebml, &clusterStart, Cluster); // cluster
-      Ebml_SerializeUnsigned(&ebml, Timecode, 0);
-
-      unsigned char someData[4] = {1, 2, 3, 4};
-      writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4);
-      Ebml_EndSubElement(&ebml, &clusterStart);
-    }    // end cluster
-    Ebml_EndSubElement(&ebml, &startSegment);
-  }
-
-  // dump ebml stuff to the file
-  FILE *file_out = fopen("test.mkv", "wb");
-  size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out);
-  fclose(file_out);
-  return 0;
-}
-\ No newline at end of file
diff --git a/third_party/nestegg/0001-include-paths.diff b/third_party/nestegg/0001-include-paths.diff
deleted file mode 100644
index a704ebdcd..000000000
--- a/third_party/nestegg/0001-include-paths.diff
+++ /dev/null
@@ -1,41 +0,0 @@
-diff --git a/nestegg/halloc/src/halloc.c b/nestegg/halloc/src/halloc.c
-index 5758fc0..837b3ff 100644
---- a/nestegg/halloc/src/halloc.c
-+++ b/nestegg/halloc/src/halloc.c
-@@ -15,7 +15,7 @@
- #include <stdlib.h>  /* realloc */
- #include <string.h>  /* memset & co */
- 
--#include "halloc.h"
-+#include "third_party/nestegg/halloc/halloc.h"
- #include "align.h"
- #include "hlist.h"
- 
-diff --git a/nestegg/include/nestegg/nestegg.h b/nestegg/include/nestegg/nestegg.h
-index ff13728..c18d1d3 100644
---- a/nestegg/include/nestegg/nestegg.h
-+++ b/nestegg/include/nestegg/nestegg.h
-@@ -7,7 +7,7 @@
- #if !defined(NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79)
- #define NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
- 
--#include <nestegg/nestegg-stdint.h>
-+#include "vpx/vpx_integer.h"
- 
- #if defined(__cplusplus)
- extern "C" {
-diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
-index daf1eed..4fb10e7 100644
---- a/nestegg/src/nestegg.c
-+++ b/nestegg/src/nestegg.c
-@@ -8,8 +8,8 @@
- #include <stdlib.h>
- #include <string.h>
- 
--#include "halloc.h"
--#include "nestegg/nestegg.h"
-+#include "third_party/nestegg/halloc/halloc.h"
-+#include "third_party/nestegg/include/nestegg/nestegg.h"
- 
- /* EBML Elements */
- #define ID_EBML                 0x1a45dfa3
diff --git a/third_party/nestegg/0002-ne_read_simple-uninitialized_variable.diff b/third_party/nestegg/0002-ne_read_simple-uninitialized_variable.diff
deleted file mode 100644
index c3bc9e575..000000000
--- a/third_party/nestegg/0002-ne_read_simple-uninitialized_variable.diff
+++ /dev/null
@@ -1,21 +0,0 @@
-diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
-index 4fb10e7..b6bc460 100644
---- a/nestegg/src/nestegg.c
-+++ b/nestegg/src/nestegg.c
-@@ -934,7 +934,7 @@ static int
- ne_read_simple(nestegg * ctx, struct ebml_element_desc * desc, size_t length)
- {
-   struct ebml_type * storage;
--  int r;
-+  int r = 0;
- 
-   storage = (struct ebml_type *) (ctx->ancestor->data + desc->offset);
- 
-@@ -968,7 +968,6 @@ ne_read_simple(nestegg * ctx, struct ebml_element_desc * desc, size_t length)
-   case TYPE_MASTER:
-   case TYPE_UNKNOWN:
-     assert(0);
--    r = 0;
-     break;
-   }
- 
diff --git a/third_party/nestegg/AUTHORS b/third_party/nestegg/AUTHORS
deleted file mode 100644
index 7d2c61265..000000000
--- a/third_party/nestegg/AUTHORS
+++ /dev/null
@@ -1,3 +0,0 @@
-Matthew Gregan <kinetik@flim.org>
-Steve Workman <sjhworkman@gmail.com>
-Paul Adenot <paul@paul.cx>
diff --git a/third_party/nestegg/INSTALL b/third_party/nestegg/INSTALL
deleted file mode 100644
index 401df4184..000000000
--- a/third_party/nestegg/INSTALL
+++ /dev/null
@@ -1,8 +0,0 @@
-Build instructions for libnestegg
-=================================
-
-0. Change directory into the source directory.
-1. Run |autoreconf --install| to generate configure.
-2. Run |./configure| to configure the build.
-3. Run |make| to build.
-4. Run |make check| to run the test suite.
diff --git a/third_party/nestegg/LICENSE b/third_party/nestegg/LICENSE
deleted file mode 100644
index a67984a61..000000000
--- a/third_party/nestegg/LICENSE
+++ /dev/null
@@ -1,13 +0,0 @@
-Copyright © 2010 Mozilla Foundation
-
-Permission to use, copy, modify, and distribute this software for any
-purpose with or without fee is hereby granted, provided that the above
-copyright notice and this permission notice appear in all copies.
-
-THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/third_party/nestegg/README b/third_party/nestegg/README
deleted file mode 100644
index 47c8237d2..000000000
--- a/third_party/nestegg/README
+++ /dev/null
@@ -1,6 +0,0 @@
-See INSTALL for build instructions.
-
-Licensed under an ISC-style license.  See LICENSE for details.
-
-The source under the halloc/ directory is licensed under a BSD license.  See
-halloc/halloc.h for details.
diff --git a/third_party/nestegg/README.libvpx b/third_party/nestegg/README.libvpx
deleted file mode 100644
index 8e3760bd7..000000000
--- a/third_party/nestegg/README.libvpx
+++ /dev/null
@@ -1,24 +0,0 @@
-URL: https://github.com/kinetiknz/nestegg.git
-Version: f46223191d8116a36bf299b5b9793fcb798417b1
-License: ISC-style
-License File: LICENSE
-
-Description:
-The source under the halloc/ directory is licensed under a BSD license. See
-halloc/halloc.h for details.
-
-Local Modifications:
-- delete unnecessary docs and build files
-- nestegg/0001-include-paths.diff
-  include path modifications for the libvpx build system
-- 0002-ne_read_simple-uninitialized_variable.diff
-  fixes:
-nestegg.c|975 col 6| warning: ‘r’ may be used uninitialized in this function [-Wuninitialized]
-- add ne_get_uint32 convenience function
-- fix track_number uint64->uint32 warnings
-- fix track_scale double->uint64 warning
-- nestegg_packet_track: fix uint64->uint32 warning
-- ne_read_(string|binary|block): normalize size_t usage
-- ne_parse: normalize size_t usage
-- quiet read related uint64->size_t warnings
-- ne_buffer_read: quiet uint64->size_t warning
diff --git a/third_party/nestegg/TODO b/third_party/nestegg/TODO
deleted file mode 100644
index bf0cb04c4..000000000
--- a/third_party/nestegg/TODO
+++ /dev/null
@@ -1,21 +0,0 @@
-- Document when read, seek, tell callbacks are used.
-- Add an automated testsuite.
-- Test (and fix, if necessary) support for unknown sizes.
-- Test (and fix, if necessary) support for large files.
-- Read past unknown elements rather than seeking.
-- Try to handle unknown elements with unknown sizes.
-- Formalize handling of default element values.
-- Try to resynchronize stream when read_block fails so that failure to parse
-  a single block can be treated as non-fatal.
-- Make logging more useful to API users.
-- Avoid reparsing Cues and ignore any SeekHead at end of file.
-- Optionally build a Cue index as Clusters are parsed.
-- Support seeking without Cues.
-- Avoid building a list of Clusters as they are parsed and retain only the
-  last one parsed.
-- Add an asynchronous error code to struct nestegg and ensure that API calls
-  continue to fail safely one a fatal error has been returned.
-- Modify parser/data structures to provide a clean separation.  Perhaps the
-  parser should return a generic tree of nodes that a second pass uses to
-  initialize the main data structures.
-- Use pool allocator for all allocations.
diff --git a/third_party/nestegg/halloc/README b/third_party/nestegg/halloc/README
deleted file mode 100644
index 380fba2b8..000000000
--- a/third_party/nestegg/halloc/README
+++ /dev/null
@@ -1,45 +0,0 @@
-halloc 1.2.1
-============
-      
-	Hierarchical memory heap interface - an extension to standard
-	malloc/free interface that simplifies tasks of memory disposal 
-	when allocated structures exhibit hierarchical properties.
-
-	http://swapped.cc/halloc
-=
-	To build libhalloc.a with GNU tools run
-		make
-
-	To install in /usr/include and /usr/lib
-		make install
-
-	To cleanup the build files 
-		make clean
-=
-	halloc-1.2.1
-		* fixed a double-free bug in _set_allocator() as per
-		  Matthew Gregan comments
-
-		* switched to using NULL instead of 0 where applicable
-
-	halloc-1.2.0
-		* added missing <string.h> include to halloc.c
-		
-		* improved standard compliance thanks to the feedback
-		  received from Stan Tobias. Two things were fixed -
-		  
-		- hblock_t structure no longer uses zero-sized 'data'
-		  array, which happened to be common, but non-standard
-		  extension; 
-		  
-		- secondly, added the code to test the behaviour of 
-		  realloc(ptr, 0). Standard allows it NOT to act as
-		  free(), in which case halloc will use its own version
-		  of allocator calling free() when neccessary.
-
-	halloc-1.1.0
-		* initial public release (rewrite of hhmalloc library)
-
-=============================================================================
-Copyright (c) 2004-2010, Alex Pankratov (ap@swapped.cc). All rights reserved.
-
diff --git a/third_party/nestegg/halloc/halloc.h b/third_party/nestegg/halloc/halloc.h
deleted file mode 100644
index 10af4e8d8..000000000
--- a/third_party/nestegg/halloc/halloc.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
- *
- *	Hierarchical memory allocator, 1.2.1
- *	http://swapped.cc/halloc
- */
-
-/*
- *	The program is distributed under terms of BSD license. 
- *	You can obtain the copy of the license by visiting:
- *	
- *	http://www.opensource.org/licenses/bsd-license.php
- */
-
-#ifndef _LIBP_HALLOC_H_
-#define _LIBP_HALLOC_H_
-
-#include <stddef.h>  /* size_t */
-
-/*
- *	Core API
- */
-void * halloc (void * block, size_t len);
-void   hattach(void * block, void * parent);
-
-/*
- *	standard malloc/free api
- */
-void * h_malloc (size_t len);
-void * h_calloc (size_t n, size_t len);
-void * h_realloc(void * p, size_t len);
-void   h_free   (void * p);
-char * h_strdup (const char * str);
-
-/*
- *	the underlying allocator
- */
-typedef void * (* realloc_t)(void * ptr, size_t len);
-
-extern realloc_t halloc_allocator;
-
-#endif
-
diff --git a/third_party/nestegg/halloc/src/align.h b/third_party/nestegg/halloc/src/align.h
deleted file mode 100644
index 4c6e1831f..000000000
--- a/third_party/nestegg/halloc/src/align.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
- *
- *	Hierarchical memory allocator, 1.2.1
- *	http://swapped.cc/halloc
- */
-
-/*
- *	The program is distributed under terms of BSD license. 
- *	You can obtain the copy of the license by visiting:
- *	
- *	http://www.opensource.org/licenses/bsd-license.php
- */
-
-#ifndef _LIBP_ALIGN_H_
-#define _LIBP_ALIGN_H_
-
-/*
- *	a type with the most strict alignment requirements
- */
-union max_align
-{
-	char   c;
-	short  s;
-	long   l;
-	int    i;
-	float  f;
-	double d;
-	void * v;
-	void (*q)(void);
-};
-
-typedef union max_align max_align_t;
-
-#endif
-
diff --git a/third_party/nestegg/halloc/src/halloc.c b/third_party/nestegg/halloc/src/halloc.c
deleted file mode 100644
index 8860d736a..000000000
--- a/third_party/nestegg/halloc/src/halloc.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- *	Copyright (c) 2004i-2010 Alex Pankratov. All rights reserved.
- *
- *	Hierarchical memory allocator, 1.2.1
- *	http://swapped.cc/halloc
- */
-
-/*
- *	The program is distributed under terms of BSD license. 
- *	You can obtain the copy of the license by visiting:
- *	
- *	http://www.opensource.org/licenses/bsd-license.php
- */
-
-#include <stdlib.h>  /* realloc */
-#include <string.h>  /* memset & co */
-
-#include "third_party/nestegg/halloc/halloc.h"
-#include "align.h"
-#include "hlist.h"
-
-/*
- *	block control header
- */
-typedef struct hblock
-{
-#ifndef NDEBUG
-#define HH_MAGIC    0x20040518L
-	long          magic;
-#endif
-	hlist_item_t  siblings; /* 2 pointers */
-	hlist_head_t  children; /* 1 pointer  */
-	max_align_t   data[1];  /* not allocated, see below */
-	
-} hblock_t;
-
-#define sizeof_hblock offsetof(hblock_t, data)
-
-/*
- *
- */
-realloc_t halloc_allocator = NULL;
-
-#define allocator halloc_allocator
-
-/*
- *	static methods
- */
-static void _set_allocator(void);
-static void * _realloc(void * ptr, size_t n);
-
-static int  _relate(hblock_t * b, hblock_t * p);
-static void _free_children(hblock_t * p);
-
-/*
- *	Core API
- */
-void * halloc(void * ptr, size_t len)
-{
-	hblock_t * p;
-
-	/* set up default allocator */
-	if (! allocator)
-	{
-		_set_allocator();
-		assert(allocator);
-	}
-
-	/* calloc */
-	if (! ptr)
-	{
-		if (! len)
-			return NULL;
-
-		p = allocator(0, len + sizeof_hblock);
-		if (! p)
-			return NULL;
-#ifndef NDEBUG
-		p->magic = HH_MAGIC;
-#endif
-		hlist_init(&p->children);
-		hlist_init_item(&p->siblings);
-
-		return p->data;
-	}
-
-	p = structof(ptr, hblock_t, data);
-	assert(p->magic == HH_MAGIC);
-
-	/* realloc */
-	if (len)
-	{
-		p = allocator(p, len + sizeof_hblock);
-		if (! p)
-			return NULL;
-
-		hlist_relink(&p->siblings);
-		hlist_relink_head(&p->children);
-		
-		return p->data;
-	}
-
-	/* free */
-	_free_children(p);
-	hlist_del(&p->siblings);
-	allocator(p, 0);
-
-	return NULL;
-}
-
-void hattach(void * block, void * parent)
-{
-	hblock_t * b, * p;
-	
-	if (! block)
-	{
-		assert(! parent);
-		return;
-	}
-
-	/* detach */
-	b = structof(block, hblock_t, data);
-	assert(b->magic == HH_MAGIC);
-
-	hlist_del(&b->siblings);
-
-	if (! parent)
-		return;
-
-	/* attach */
-	p = structof(parent, hblock_t, data);
-	assert(p->magic == HH_MAGIC);
-	
-	/* sanity checks */
-	assert(b != p);          /* trivial */
-	assert(! _relate(p, b)); /* heavy ! */
-
-	hlist_add(&p->children, &b->siblings);
-}
-
-/*
- *	malloc/free api
- */
-void * h_malloc(size_t len)
-{
-	return halloc(0, len);
-}
-
-void * h_calloc(size_t n, size_t len)
-{
-	void * ptr = halloc(0, len*=n);
-	return ptr ? memset(ptr, 0, len) : NULL;
-}
-
-void * h_realloc(void * ptr, size_t len)
-{
-	return halloc(ptr, len);
-}
-
-void   h_free(void * ptr)
-{
-	halloc(ptr, 0);
-}
-
-char * h_strdup(const char * str)
-{
-	size_t len = strlen(str);
-	char * ptr = halloc(0, len + 1);
-	return ptr ? (ptr[len] = 0, memcpy(ptr, str, len)) : NULL;
-}
-
-/*
- *	static stuff
- */
-static void _set_allocator(void)
-{
-	void * p;
-	assert(! allocator);
-	
-	/*
-	 *	the purpose of the test below is to check the behaviour
-	 *	of realloc(ptr, 0), which is defined in the standard
-	 *	as an implementation-specific. if it returns zero,
-	 *	then it's equivalent to free(). it can however return
-	 *	non-zero, in which case it cannot be used for freeing
-	 *	memory blocks and we'll need to supply our own version
-	 *
-	 *	Thanks to Stan Tobias for pointing this tricky part out.
-	 */
-	allocator = realloc;
-	if (! (p = malloc(1)))
-		/* hmm */
-		return;
-		
-	if ((p = realloc(p, 0)))
-	{
-		/* realloc cannot be used as free() */
-		allocator = _realloc;
-		free(p);
-	}
-}
-
-static void * _realloc(void * ptr, size_t n)
-{
-	/*
-	 *	free'ing realloc()
-	 */
-	if (n)
-		return realloc(ptr, n);
-	free(ptr);
-	return NULL;
-}
-
-static int _relate(hblock_t * b, hblock_t * p)
-{
-	hlist_item_t * i;
-
-	if (!b || !p)
-		return 0;
-
-	/* 
-	 *  since there is no 'parent' pointer, which would've allowed
-	 *  O(log(n)) upward traversal, the check must use O(n) downward 
-	 *  iteration of the entire hierarchy; and this can be VERY SLOW
-	 */
-	hlist_for_each(i, &p->children)
-	{
-		hblock_t * q = structof(i, hblock_t, siblings);
-		if (q == b || _relate(b, q))
-			return 1;
-	}
-	return 0;
-}
-
-static void _free_children(hblock_t * p)
-{
-	hlist_item_t * i, * tmp;
-	
-#ifndef NDEBUG
-	/*
-	 *	this catches loops in hierarchy with almost zero 
-	 *	overhead (compared to _relate() running time)
-	 */
-	assert(p && p->magic == HH_MAGIC);
-	p->magic = 0; 
-#endif
-	hlist_for_each_safe(i, tmp, &p->children)
-	{
-		hblock_t * q = structof(i, hblock_t, siblings);
-		_free_children(q);
-		allocator(q, 0);
-	}
-}
-
diff --git a/third_party/nestegg/halloc/src/hlist.h b/third_party/nestegg/halloc/src/hlist.h
deleted file mode 100644
index 2791f78c7..000000000
--- a/third_party/nestegg/halloc/src/hlist.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
- *
- *	Hierarchical memory allocator, 1.2.1
- *	http://swapped.cc/halloc
- */
-
-/*
- *	The program is distributed under terms of BSD license. 
- *	You can obtain the copy of the license by visiting:
- *	
- *	http://www.opensource.org/licenses/bsd-license.php
- */
-
-#ifndef _LIBP_HLIST_H_
-#define _LIBP_HLIST_H_
-
-#include <assert.h>
-#include "macros.h"  /* static_inline */
-
-/*
- *	weak double-linked list w/ tail sentinel
- */
-typedef struct hlist_head  hlist_head_t;
-typedef struct hlist_item  hlist_item_t;
-
-/*
- *
- */
-struct hlist_head
-{
-	hlist_item_t * next;
-};
-
-struct hlist_item
-{
-	hlist_item_t * next;
-	hlist_item_t ** prev;
-};
-
-/*
- *	shared tail sentinel
- */
-struct hlist_item hlist_null;
-
-/*
- *
- */
-#define __hlist_init(h)      { &hlist_null }
-#define __hlist_init_item(i) { &hlist_null, &(i).next }
-
-static_inline void hlist_init(hlist_head_t * h);
-static_inline void hlist_init_item(hlist_item_t * i);
-
-/* static_inline void hlist_purge(hlist_head_t * h); */
-
-/* static_inline bool_t hlist_empty(const hlist_head_t * h); */
-
-/* static_inline hlist_item_t * hlist_head(const hlist_head_t * h); */
-
-/* static_inline hlist_item_t * hlist_next(const hlist_item_t * i); */
-/* static_inline hlist_item_t * hlist_prev(const hlist_item_t * i, 
-                                           const hlist_head_t * h); */
-
-static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i);
-
-/* static_inline void hlist_add_prev(hlist_item_t * l, hlist_item_t * i); */
-/* static_inline void hlist_add_next(hlist_item_t * l, hlist_item_t * i); */
-
-static_inline void hlist_del(hlist_item_t * i);
-
-static_inline void hlist_relink(hlist_item_t * i);
-static_inline void hlist_relink_head(hlist_head_t * h);
-
-#define hlist_for_each(i, h) \
-	for (i = (h)->next; i != &hlist_null; i = i->next)
-
-#define hlist_for_each_safe(i, tmp, h) \
-	for (i = (h)->next, tmp = i->next; \
-	     i!= &hlist_null; \
-	     i = tmp, tmp = i->next)
-
-/*
- *	static
- */
-static_inline void hlist_init(hlist_head_t * h)
-{
-	assert(h);
-	h->next = &hlist_null;
-}
-
-static_inline void hlist_init_item(hlist_item_t * i)
-{
-	assert(i);
-	i->prev = &i->next;
-	i->next = &hlist_null;
-}
-
-static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i)
-{
-	hlist_item_t * next;
-	assert(h && i);
-	
-	next = i->next = h->next;
-	next->prev = &i->next;
-	h->next = i;
-	i->prev = &h->next;
-}
-
-static_inline void hlist_del(hlist_item_t * i)
-{
-	hlist_item_t * next;
-	assert(i);
-
-	next = i->next;
-	next->prev = i->prev;
-	*i->prev = next;
-	
-	hlist_init_item(i);
-}
-
-static_inline void hlist_relink(hlist_item_t * i)
-{
-	assert(i);
-	*i->prev = i;
-	i->next->prev = &i->next;
-}
-
-static_inline void hlist_relink_head(hlist_head_t * h)
-{
-	assert(h);
-	h->next->prev = &h->next;
-}
-
-#endif
-
diff --git a/third_party/nestegg/halloc/src/macros.h b/third_party/nestegg/halloc/src/macros.h
deleted file mode 100644
index 1f84bc277..000000000
--- a/third_party/nestegg/halloc/src/macros.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
- *
- *	Hierarchical memory allocator, 1.2.1
- *	http://swapped.cc/halloc
- */
-
-/*
- *	The program is distributed under terms of BSD license. 
- *	You can obtain the copy of the license by visiting:
- *	
- *	http://www.opensource.org/licenses/bsd-license.php
- */
-
-#ifndef _LIBP_MACROS_H_
-#define _LIBP_MACROS_H_
-
-#include <stddef.h>  /* offsetof */
-
-/*
- 	restore pointer to the structure by a pointer to its field
- */
-#define structof(p,t,f) ((t*)(- (ptrdiff_t) offsetof(t,f) + (char*)(p)))
-
-/*
- *	redefine for the target compiler
- */
-#ifdef _WIN32
-#define static_inline static __inline
-#else
-#define static_inline static __inline__
-#endif
-
-
-#endif
-
diff --git a/third_party/nestegg/include/nestegg/nestegg.h b/third_party/nestegg/include/nestegg/nestegg.h
deleted file mode 100644
index c18d1d3bf..000000000
--- a/third_party/nestegg/include/nestegg/nestegg.h
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Copyright © 2010 Mozilla Foundation
- *
- * This program is made available under an ISC-style license.  See the
- * accompanying file LICENSE for details.
- */
-#if !defined(NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79)
-#define NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
-
-#include "vpx/vpx_integer.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/** @mainpage
-
-    @section intro Introduction
-
-    This is the documentation for the <tt>libnestegg</tt> C API.
-    <tt>libnestegg</tt> is a demultiplexing library for <a
-    href="http://www.webmproject.org/code/specs/container/">WebM</a>
-    media files.
-
-    @section example Example code
-
-    @code
-    nestegg * demux_ctx;
-    nestegg_init(&demux_ctx, io, NULL);
-
-    nestegg_packet * pkt;
-    while ((r = nestegg_read_packet(demux_ctx, &pkt)) > 0) {
-      unsigned int track;
-
-      nestegg_packet_track(pkt, &track);
-
-      // This example decodes the first track only.
-      if (track == 0) {
-        unsigned int chunk, chunks;
-
-        nestegg_packet_count(pkt, &chunks);
-
-        // Decode each chunk of data.
-        for (chunk = 0; chunk < chunks; ++chunk) {
-          unsigned char * data;
-          size_t data_size;
-
-          nestegg_packet_data(pkt, chunk, &data, &data_size);
-
-          example_codec_decode(codec_ctx, data, data_size);
-        }
-      }
-
-      nestegg_free_packet(pkt);
-    }
-
-    nestegg_destroy(demux_ctx);
-    @endcode
-*/
-
-
-/** @file
-    The <tt>libnestegg</tt> C API. */
-
-#define NESTEGG_TRACK_VIDEO 0 /**< Track is of type video. */
-#define NESTEGG_TRACK_AUDIO 1 /**< Track is of type audio. */
-
-#define NESTEGG_CODEC_VP8    0 /**< Track uses Google On2 VP8 codec. */
-#define NESTEGG_CODEC_VORBIS 1 /**< Track uses Xiph Vorbis codec. */
-#define NESTEGG_CODEC_VP9    2 /**< Track uses Google On2 VP9 codec. */
-#define NESTEGG_CODEC_OPUS   3 /**< Track uses Xiph Opus codec. */
-
-#define NESTEGG_VIDEO_MONO              0 /**< Track is mono video. */
-#define NESTEGG_VIDEO_STEREO_LEFT_RIGHT 1 /**< Track is side-by-side stereo video.  Left first. */
-#define NESTEGG_VIDEO_STEREO_BOTTOM_TOP 2 /**< Track is top-bottom stereo video.  Right first. */
-#define NESTEGG_VIDEO_STEREO_TOP_BOTTOM 3 /**< Track is top-bottom stereo video.  Left first. */
-#define NESTEGG_VIDEO_STEREO_RIGHT_LEFT 11 /**< Track is side-by-side stereo video.  Right first. */
-
-#define NESTEGG_SEEK_SET 0 /**< Seek offset relative to beginning of stream. */
-#define NESTEGG_SEEK_CUR 1 /**< Seek offset relative to current position in stream. */
-#define NESTEGG_SEEK_END 2 /**< Seek offset relative to end of stream. */
-
-#define NESTEGG_LOG_DEBUG    1     /**< Debug level log message. */
-#define NESTEGG_LOG_INFO     10    /**< Informational level log message. */
-#define NESTEGG_LOG_WARNING  100   /**< Warning level log message. */
-#define NESTEGG_LOG_ERROR    1000  /**< Error level log message. */
-#define NESTEGG_LOG_CRITICAL 10000 /**< Critical level log message. */
-
-typedef struct nestegg nestegg;               /**< Opaque handle referencing the stream state. */
-typedef struct nestegg_packet nestegg_packet; /**< Opaque handle referencing a packet of data. */
-
-/** User supplied IO context. */
-typedef struct {
-  /** User supplied read callback.
-      @param buffer   Buffer to read data into.
-      @param length   Length of supplied buffer in bytes.
-      @param userdata The #userdata supplied by the user.
-      @retval  1 Read succeeded.
-      @retval  0 End of stream.
-      @retval -1 Error. */
-  int (* read)(void * buffer, size_t length, void * userdata);
-
-  /** User supplied seek callback.
-      @param offset   Offset within the stream to seek to.
-      @param whence   Seek direction.  One of #NESTEGG_SEEK_SET,
-                      #NESTEGG_SEEK_CUR, or #NESTEGG_SEEK_END.
-      @param userdata The #userdata supplied by the user.
-      @retval  0 Seek succeeded.
-      @retval -1 Error. */
-  int (* seek)(int64_t offset, int whence, void * userdata);
-
-  /** User supplied tell callback.
-      @param userdata The #userdata supplied by the user.
-      @returns Current position within the stream.
-      @retval -1 Error. */
-  int64_t (* tell)(void * userdata);
-
-  /** User supplied pointer to be passed to the IO callbacks. */
-  void * userdata;
-} nestegg_io;
-
-/** Parameters specific to a video track. */
-typedef struct {
-  unsigned int stereo_mode;    /**< Video mode.  One of #NESTEGG_VIDEO_MONO,
-                                    #NESTEGG_VIDEO_STEREO_LEFT_RIGHT,
-                                    #NESTEGG_VIDEO_STEREO_BOTTOM_TOP, or
-                                    #NESTEGG_VIDEO_STEREO_TOP_BOTTOM. */
-  unsigned int width;          /**< Width of the video frame in pixels. */
-  unsigned int height;         /**< Height of the video frame in pixels. */
-  unsigned int display_width;  /**< Display width of the video frame in pixels. */
-  unsigned int display_height; /**< Display height of the video frame in pixels. */
-  unsigned int crop_bottom;    /**< Pixels to crop from the bottom of the frame. */
-  unsigned int crop_top;       /**< Pixels to crop from the top of the frame. */
-  unsigned int crop_left;      /**< Pixels to crop from the left of the frame. */
-  unsigned int crop_right;     /**< Pixels to crop from the right of the frame. */
-} nestegg_video_params;
-
-/** Parameters specific to an audio track. */
-typedef struct {
-  double rate;           /**< Sampling rate in Hz. */
-  unsigned int channels; /**< Number of audio channels. */
-  unsigned int depth;    /**< Bits per sample. */
-  uint64_t  codec_delay; /**< Nanoseconds that must be discarded from the start. */
-  uint64_t  seek_preroll;/**< Nanoseconds that must be discarded after a seek. */
-} nestegg_audio_params;
-
-/** Logging callback function pointer. */
-typedef void (* nestegg_log)(nestegg * context, unsigned int severity, char const * format, ...);
-
-/** Initialize a nestegg context.  During initialization the parser will
-    read forward in the stream processing all elements until the first
-    block of media is reached.  All track metadata has been processed at this point.
-    @param context  Storage for the new nestegg context.  @see nestegg_destroy
-    @param io       User supplied IO context.
-    @param callback Optional logging callback function pointer.  May be NULL.
-    @param max_offset Optional maximum offset to be read. Set -1 to ignore.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback, int64_t max_offset);
-
-/** Destroy a nestegg context and free associated memory.
-    @param context #nestegg context to be freed.  @see nestegg_init */
-void nestegg_destroy(nestegg * context);
-
-/** Query the duration of the media stream in nanoseconds.
-    @param context  Stream context initialized by #nestegg_init.
-    @param duration Storage for the queried duration.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_duration(nestegg * context, uint64_t * duration);
-
-/** Query the tstamp scale of the media stream in nanoseconds.
-    Timecodes presented by nestegg have been scaled by this value
-    before presentation to the caller.
-    @param context Stream context initialized by #nestegg_init.
-    @param scale   Storage for the queried scale factor.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_tstamp_scale(nestegg * context, uint64_t * scale);
-
-/** Query the number of tracks in the media stream.
-    @param context Stream context initialized by #nestegg_init.
-    @param tracks  Storage for the queried track count.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_track_count(nestegg * context, unsigned int * tracks);
-
-/** Query the start and end offset for a particular cluster.
-    @param context     Stream context initialized by #nestegg_init.
-    @param cluster_num Zero-based cluster number; order they appear in cues.
-    @param max_offset  Optional maximum offset to be read. Set -1 to ignore.
-    @param start_pos   Starting offset of the cluster. -1 means non-existant.
-    @param end_pos     Starting offset of the cluster. -1 means non-existant or
-                       final cluster.
-    @param tstamp      Starting timestamp of the cluster.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_get_cue_point(nestegg * context, unsigned int cluster_num,
-                          int64_t max_offset, int64_t * start_pos,
-                          int64_t * end_pos, uint64_t * tstamp);
-
-/** Seek to @a offset.  Stream will seek directly to offset.
-    Should be used to seek to the start of a resync point, i.e. cluster; the
-    parser will not be able to understand other offsets.
-    @param context Stream context initialized by #nestegg_init.
-    @param offset  Absolute offset in bytes.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_offset_seek(nestegg * context, uint64_t offset);
-
-/** Seek @a track to @a tstamp.  Stream seek will terminate at the earliest
-    key point in the stream at or before @a tstamp.  Other tracks in the
-    stream will output packets with unspecified but nearby timestamps.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @param tstamp  Absolute timestamp in nanoseconds.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_track_seek(nestegg * context, unsigned int track, uint64_t tstamp);
-
-/** Query the type specified by @a track.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @retval #NESTEGG_TRACK_VIDEO Track type is video.
-    @retval #NESTEGG_TRACK_AUDIO Track type is audio.
-    @retval -1 Error. */
-int nestegg_track_type(nestegg * context, unsigned int track);
-
-/** Query the codec ID specified by @a track.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @retval #NESTEGG_CODEC_VP8    Track codec is VP8.
-    @retval #NESTEGG_CODEC_VORBIS Track codec is Vorbis.
-    @retval -1 Error. */
-int nestegg_track_codec_id(nestegg * context, unsigned int track);
-
-/** Query the number of codec initialization chunks for @a track.  Each
-    chunk of data should be passed to the codec initialization functions in
-    the order returned.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @param count   Storage for the queried chunk count.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_track_codec_data_count(nestegg * context, unsigned int track,
-                                   unsigned int * count);
-
-/** Get a pointer to chunk number @a item of codec initialization data for
-    @a track.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @param item    Zero based chunk item number.
-    @param data    Storage for the queried data pointer.
-                   The data is owned by the #nestegg context.
-    @param length  Storage for the queried data size.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_track_codec_data(nestegg * context, unsigned int track, unsigned int item,
-                             unsigned char ** data, size_t * length);
-
-/** Query the video parameters specified by @a track.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @param params  Storage for the queried video parameters.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_track_video_params(nestegg * context, unsigned int track,
-                               nestegg_video_params * params);
-
-/** Query the audio parameters specified by @a track.
-    @param context Stream context initialized by #nestegg_init.
-    @param track   Zero based track number.
-    @param params  Storage for the queried audio parameters.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_track_audio_params(nestegg * context, unsigned int track,
-                               nestegg_audio_params * params);
-
-/** Read a packet of media data.  A packet consists of one or more chunks of
-    data associated with a single track.  nestegg_read_packet should be
-    called in a loop while the return value is 1 to drive the stream parser
-    forward.  @see nestegg_free_packet
-    @param context Context returned by #nestegg_init.
-    @param packet  Storage for the returned nestegg_packet.
-    @retval  1 Additional packets may be read in subsequent calls.
-    @retval  0 End of stream.
-    @retval -1 Error. */
-int nestegg_read_packet(nestegg * context, nestegg_packet ** packet);
-
-/** Destroy a nestegg_packet and free associated memory.
-    @param packet #nestegg_packet to be freed. @see nestegg_read_packet */
-void nestegg_free_packet(nestegg_packet * packet);
-
-/** Query the track number of @a packet.
-    @param packet Packet initialized by #nestegg_read_packet.
-    @param track  Storage for the queried zero based track index.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_packet_track(nestegg_packet * packet, unsigned int * track);
-
-/** Query the time stamp in nanoseconds of @a packet.
-    @param packet Packet initialized by #nestegg_read_packet.
-    @param tstamp Storage for the queried timestamp in nanoseconds.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_packet_tstamp(nestegg_packet * packet, uint64_t * tstamp);
-
-/** Query the number of data chunks contained in @a packet.
-    @param packet Packet initialized by #nestegg_read_packet.
-    @param count  Storage for the queried timestamp in nanoseconds.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_packet_count(nestegg_packet * packet, unsigned int * count);
-
-/** Get a pointer to chunk number @a item of packet data.
-    @param packet  Packet initialized by #nestegg_read_packet.
-    @param item    Zero based chunk item number.
-    @param data    Storage for the queried data pointer.
-                   The data is owned by the #nestegg_packet packet.
-    @param length  Storage for the queried data size.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_packet_data(nestegg_packet * packet, unsigned int item,
-                        unsigned char ** data, size_t * length);
-
-/** Returns discard_padding for given packet
-    @param packet  Packet initialized by #nestegg_read_packet.
-    @param discard_padding pointer to store discard padding in.
-    @retval  0 Success.
-    @retval -1 Error. */
-int nestegg_packet_discard_padding(nestegg_packet * packet,
-                                   int64_t * discard_padding);
-
-/** Query the presence of cues.
-    @param context  Stream context initialized by #nestegg_init.
-    @retval 0 The media has no cues.
-    @retval 1 The media has cues. */
-int nestegg_has_cues(nestegg * context);
-
-/**
- * Try to determine if the buffer looks like the beginning of a WebM file.
- *
- * @param buffer A buffer containing the beginning of a media file.
- * @param length The size of the buffer.
- * @retval 0 The file is not a WebM file.
- * @retval 1 The file is a WebM file. */
-int nestegg_sniff(unsigned char const * buffer, size_t length);
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /* NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 */
diff --git a/third_party/nestegg/src/nestegg.c b/third_party/nestegg/src/nestegg.c
deleted file mode 100644
index c7e2b024a..000000000
--- a/third_party/nestegg/src/nestegg.c
+++ /dev/null
@@ -1,2323 +0,0 @@
-/*
- * Copyright © 2010 Mozilla Foundation
- *
- * This program is made available under an ISC-style license.  See the
- * accompanying file LICENSE for details.
- */
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/nestegg/halloc/halloc.h"
-#include "third_party/nestegg/include/nestegg/nestegg.h"
-
-/* EBML Elements */
-#define ID_EBML                 0x1a45dfa3
-#define ID_EBML_VERSION         0x4286
-#define ID_EBML_READ_VERSION    0x42f7
-#define ID_EBML_MAX_ID_LENGTH   0x42f2
-#define ID_EBML_MAX_SIZE_LENGTH 0x42f3
-#define ID_DOCTYPE              0x4282
-#define ID_DOCTYPE_VERSION      0x4287
-#define ID_DOCTYPE_READ_VERSION 0x4285
-
-/* Global Elements */
-#define ID_VOID                 0xec
-#define ID_CRC32                0xbf
-
-/* WebM Elements */
-#define ID_SEGMENT              0x18538067
-
-/* Seek Head Elements */
-#define ID_SEEK_HEAD            0x114d9b74
-#define ID_SEEK                 0x4dbb
-#define ID_SEEK_ID              0x53ab
-#define ID_SEEK_POSITION        0x53ac
-
-/* Info Elements */
-#define ID_INFO                 0x1549a966
-#define ID_TIMECODE_SCALE       0x2ad7b1
-#define ID_DURATION             0x4489
-
-/* Cluster Elements */
-#define ID_CLUSTER              0x1f43b675
-#define ID_TIMECODE             0xe7
-#define ID_BLOCK_GROUP          0xa0
-#define ID_SIMPLE_BLOCK         0xa3
-
-/* BlockGroup Elements */
-#define ID_BLOCK                0xa1
-#define ID_BLOCK_DURATION       0x9b
-#define ID_REFERENCE_BLOCK      0xfb
-#define ID_DISCARD_PADDING      0x75a2
-
-/* Tracks Elements */
-#define ID_TRACKS               0x1654ae6b
-#define ID_TRACK_ENTRY          0xae
-#define ID_TRACK_NUMBER         0xd7
-#define ID_TRACK_UID            0x73c5
-#define ID_TRACK_TYPE           0x83
-#define ID_FLAG_ENABLED         0xb9
-#define ID_FLAG_DEFAULT         0x88
-#define ID_FLAG_LACING          0x9c
-#define ID_TRACK_TIMECODE_SCALE 0x23314f
-#define ID_LANGUAGE             0x22b59c
-#define ID_CODEC_ID             0x86
-#define ID_CODEC_PRIVATE        0x63a2
-#define ID_CODEC_DELAY          0x56aa
-#define ID_SEEK_PREROLL         0x56bb
-
-/* Video Elements */
-#define ID_VIDEO                0xe0
-#define ID_STEREO_MODE          0x53b8
-#define ID_PIXEL_WIDTH          0xb0
-#define ID_PIXEL_HEIGHT         0xba
-#define ID_PIXEL_CROP_BOTTOM    0x54aa
-#define ID_PIXEL_CROP_TOP       0x54bb
-#define ID_PIXEL_CROP_LEFT      0x54cc
-#define ID_PIXEL_CROP_RIGHT     0x54dd
-#define ID_DISPLAY_WIDTH        0x54b0
-#define ID_DISPLAY_HEIGHT       0x54ba
-
-/* Audio Elements */
-#define ID_AUDIO                0xe1
-#define ID_SAMPLING_FREQUENCY   0xb5
-#define ID_CHANNELS             0x9f
-#define ID_BIT_DEPTH            0x6264
-
-/* Cues Elements */
-#define ID_CUES                 0x1c53bb6b
-#define ID_CUE_POINT            0xbb
-#define ID_CUE_TIME             0xb3
-#define ID_CUE_TRACK_POSITIONS  0xb7
-#define ID_CUE_TRACK            0xf7
-#define ID_CUE_CLUSTER_POSITION 0xf1
-#define ID_CUE_BLOCK_NUMBER     0x5378
-
-/* EBML Types */
-enum ebml_type_enum {
-  TYPE_UNKNOWN,
-  TYPE_MASTER,
-  TYPE_UINT,
-  TYPE_FLOAT,
-  TYPE_INT,
-  TYPE_STRING,
-  TYPE_BINARY
-};
-
-#define LIMIT_STRING            (1 << 20)
-#define LIMIT_BINARY            (1 << 24)
-#define LIMIT_BLOCK             (1 << 30)
-#define LIMIT_FRAME             (1 << 28)
-
-/* Field Flags */
-#define DESC_FLAG_NONE          0
-#define DESC_FLAG_MULTI         (1 << 0)
-#define DESC_FLAG_SUSPEND       (1 << 1)
-#define DESC_FLAG_OFFSET        (1 << 2)
-
-/* Block Header Flags */
-#define BLOCK_FLAGS_LACING      6
-
-/* Lacing Constants */
-#define LACING_NONE             0
-#define LACING_XIPH             1
-#define LACING_FIXED            2
-#define LACING_EBML             3
-
-/* Track Types */
-#define TRACK_TYPE_VIDEO        1
-#define TRACK_TYPE_AUDIO        2
-
-/* Track IDs */
-#define TRACK_ID_VP8            "V_VP8"
-#define TRACK_ID_VP9            "V_VP9"
-#define TRACK_ID_VORBIS         "A_VORBIS"
-#define TRACK_ID_OPUS           "A_OPUS"
-
-enum vint_mask {
-  MASK_NONE,
-  MASK_FIRST_BIT
-};
-
-struct ebml_binary {
-  unsigned char * data;
-  size_t length;
-};
-
-struct ebml_list_node {
-  struct ebml_list_node * next;
-  uint64_t id;
-  void * data;
-};
-
-struct ebml_list {
-  struct ebml_list_node * head;
-  struct ebml_list_node * tail;
-};
-
-struct ebml_type {
-  union ebml_value {
-    uint64_t u;
-    double f;
-    int64_t i;
-    char * s;
-    struct ebml_binary b;
-  } v;
-  enum ebml_type_enum type;
-  int read;
-};
-
-/* EBML Definitions */
-struct ebml {
-  struct ebml_type ebml_version;
-  struct ebml_type ebml_read_version;
-  struct ebml_type ebml_max_id_length;
-  struct ebml_type ebml_max_size_length;
-  struct ebml_type doctype;
-  struct ebml_type doctype_version;
-  struct ebml_type doctype_read_version;
-};
-
-/* Matroksa Definitions */
-struct seek {
-  struct ebml_type id;
-  struct ebml_type position;
-};
-
-struct seek_head {
-  struct ebml_list seek;
-};
-
-struct info {
-  struct ebml_type timecode_scale;
-  struct ebml_type duration;
-};
-
-struct block_group {
-  struct ebml_type duration;
-  struct ebml_type reference_block;
-  struct ebml_type discard_padding;
-};
-
-struct cluster {
-  struct ebml_type timecode;
-  struct ebml_list block_group;
-};
-
-struct video {
-  struct ebml_type stereo_mode;
-  struct ebml_type pixel_width;
-  struct ebml_type pixel_height;
-  struct ebml_type pixel_crop_bottom;
-  struct ebml_type pixel_crop_top;
-  struct ebml_type pixel_crop_left;
-  struct ebml_type pixel_crop_right;
-  struct ebml_type display_width;
-  struct ebml_type display_height;
-};
-
-struct audio {
-  struct ebml_type sampling_frequency;
-  struct ebml_type channels;
-  struct ebml_type bit_depth;
-};
-
-struct track_entry {
-  struct ebml_type number;
-  struct ebml_type uid;
-  struct ebml_type type;
-  struct ebml_type flag_enabled;
-  struct ebml_type flag_default;
-  struct ebml_type flag_lacing;
-  struct ebml_type track_timecode_scale;
-  struct ebml_type language;
-  struct ebml_type codec_id;
-  struct ebml_type codec_private;
-  struct ebml_type codec_delay;
-  struct ebml_type seek_preroll;
-  struct video video;
-  struct audio audio;
-};
-
-struct tracks {
-  struct ebml_list track_entry;
-};
-
-struct cue_track_positions {
-  struct ebml_type track;
-  struct ebml_type cluster_position;
-  struct ebml_type block_number;
-};
-
-struct cue_point {
-  struct ebml_type time;
-  struct ebml_list cue_track_positions;
-};
-
-struct cues {
-  struct ebml_list cue_point;
-};
-
-struct segment {
-  struct ebml_list seek_head;
-  struct info info;
-  struct ebml_list cluster;
-  struct tracks tracks;
-  struct cues cues;
-};
-
-/* Misc. */
-struct pool_ctx {
-  char dummy;
-};
-
-struct list_node {
-  struct list_node * previous;
-  struct ebml_element_desc * node;
-  unsigned char * data;
-};
-
-struct saved_state {
-  int64_t stream_offset;
-  struct list_node * ancestor;
-  uint64_t last_id;
-  uint64_t last_size;
-  int last_valid;
-};
-
-struct frame {
-  unsigned char * data;
-  size_t length;
-  struct frame * next;
-};
-
-/* Public (opaque) Structures */
-struct nestegg {
-  nestegg_io * io;
-  nestegg_log log;
-  struct pool_ctx * alloc_pool;
-  uint64_t last_id;
-  uint64_t last_size;
-  int last_valid;
-  struct list_node * ancestor;
-  struct ebml ebml;
-  struct segment segment;
-  int64_t segment_offset;
-  unsigned int track_count;
-};
-
-struct nestegg_packet {
-  uint64_t track;
-  uint64_t timecode;
-  struct frame * frame;
-  int64_t discard_padding;
-};
-
-/* Element Descriptor */
-struct ebml_element_desc {
-  char const * name;
-  uint64_t id;
-  enum ebml_type_enum type;
-  size_t offset;
-  unsigned int flags;
-  struct ebml_element_desc * children;
-  size_t size;
-  size_t data_offset;
-};
-
-#define E_FIELD(ID, TYPE, STRUCT, FIELD) \
-  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, NULL, 0, 0 }
-#define E_MASTER(ID, TYPE, STRUCT, FIELD) \
-  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_MULTI, ne_ ## FIELD ## _elements, \
-      sizeof(struct FIELD), 0 }
-#define E_SINGLE_MASTER_O(ID, TYPE, STRUCT, FIELD) \
-  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_OFFSET, ne_ ## FIELD ## _elements, 0, \
-      offsetof(STRUCT, FIELD ## _offset) }
-#define E_SINGLE_MASTER(ID, TYPE, STRUCT, FIELD) \
-  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, ne_ ## FIELD ## _elements, 0, 0 }
-#define E_SUSPEND(ID, TYPE) \
-  { #ID, ID, TYPE, 0, DESC_FLAG_SUSPEND, NULL, 0, 0 }
-#define E_LAST \
-  { NULL, 0, 0, 0, DESC_FLAG_NONE, NULL, 0, 0 }
-
-/* EBML Element Lists */
-static struct ebml_element_desc ne_ebml_elements[] = {
-  E_FIELD(ID_EBML_VERSION, TYPE_UINT, struct ebml, ebml_version),
-  E_FIELD(ID_EBML_READ_VERSION, TYPE_UINT, struct ebml, ebml_read_version),
-  E_FIELD(ID_EBML_MAX_ID_LENGTH, TYPE_UINT, struct ebml, ebml_max_id_length),
-  E_FIELD(ID_EBML_MAX_SIZE_LENGTH, TYPE_UINT, struct ebml, ebml_max_size_length),
-  E_FIELD(ID_DOCTYPE, TYPE_STRING, struct ebml, doctype),
-  E_FIELD(ID_DOCTYPE_VERSION, TYPE_UINT, struct ebml, doctype_version),
-  E_FIELD(ID_DOCTYPE_READ_VERSION, TYPE_UINT, struct ebml, doctype_read_version),
-  E_LAST
-};
-
-/* WebM Element Lists */
-static struct ebml_element_desc ne_seek_elements[] = {
-  E_FIELD(ID_SEEK_ID, TYPE_BINARY, struct seek, id),
-  E_FIELD(ID_SEEK_POSITION, TYPE_UINT, struct seek, position),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_seek_head_elements[] = {
-  E_MASTER(ID_SEEK, TYPE_MASTER, struct seek_head, seek),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_info_elements[] = {
-  E_FIELD(ID_TIMECODE_SCALE, TYPE_UINT, struct info, timecode_scale),
-  E_FIELD(ID_DURATION, TYPE_FLOAT, struct info, duration),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_block_group_elements[] = {
-  E_SUSPEND(ID_BLOCK, TYPE_BINARY),
-  E_FIELD(ID_BLOCK_DURATION, TYPE_UINT, struct block_group, duration),
-  E_FIELD(ID_REFERENCE_BLOCK, TYPE_INT, struct block_group, reference_block),
-  E_FIELD(ID_DISCARD_PADDING, TYPE_INT, struct block_group, discard_padding),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_cluster_elements[] = {
-  E_FIELD(ID_TIMECODE, TYPE_UINT, struct cluster, timecode),
-  E_MASTER(ID_BLOCK_GROUP, TYPE_MASTER, struct cluster, block_group),
-  E_SUSPEND(ID_SIMPLE_BLOCK, TYPE_BINARY),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_video_elements[] = {
-  E_FIELD(ID_STEREO_MODE, TYPE_UINT, struct video, stereo_mode),
-  E_FIELD(ID_PIXEL_WIDTH, TYPE_UINT, struct video, pixel_width),
-  E_FIELD(ID_PIXEL_HEIGHT, TYPE_UINT, struct video, pixel_height),
-  E_FIELD(ID_PIXEL_CROP_BOTTOM, TYPE_UINT, struct video, pixel_crop_bottom),
-  E_FIELD(ID_PIXEL_CROP_TOP, TYPE_UINT, struct video, pixel_crop_top),
-  E_FIELD(ID_PIXEL_CROP_LEFT, TYPE_UINT, struct video, pixel_crop_left),
-  E_FIELD(ID_PIXEL_CROP_RIGHT, TYPE_UINT, struct video, pixel_crop_right),
-  E_FIELD(ID_DISPLAY_WIDTH, TYPE_UINT, struct video, display_width),
-  E_FIELD(ID_DISPLAY_HEIGHT, TYPE_UINT, struct video, display_height),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_audio_elements[] = {
-  E_FIELD(ID_SAMPLING_FREQUENCY, TYPE_FLOAT, struct audio, sampling_frequency),
-  E_FIELD(ID_CHANNELS, TYPE_UINT, struct audio, channels),
-  E_FIELD(ID_BIT_DEPTH, TYPE_UINT, struct audio, bit_depth),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_track_entry_elements[] = {
-  E_FIELD(ID_TRACK_NUMBER, TYPE_UINT, struct track_entry, number),
-  E_FIELD(ID_TRACK_UID, TYPE_UINT, struct track_entry, uid),
-  E_FIELD(ID_TRACK_TYPE, TYPE_UINT, struct track_entry, type),
-  E_FIELD(ID_FLAG_ENABLED, TYPE_UINT, struct track_entry, flag_enabled),
-  E_FIELD(ID_FLAG_DEFAULT, TYPE_UINT, struct track_entry, flag_default),
-  E_FIELD(ID_FLAG_LACING, TYPE_UINT, struct track_entry, flag_lacing),
-  E_FIELD(ID_TRACK_TIMECODE_SCALE, TYPE_FLOAT, struct track_entry, track_timecode_scale),
-  E_FIELD(ID_LANGUAGE, TYPE_STRING, struct track_entry, language),
-  E_FIELD(ID_CODEC_ID, TYPE_STRING, struct track_entry, codec_id),
-  E_FIELD(ID_CODEC_PRIVATE, TYPE_BINARY, struct track_entry, codec_private),
-  E_FIELD(ID_CODEC_DELAY, TYPE_UINT, struct track_entry, codec_delay),
-  E_FIELD(ID_SEEK_PREROLL, TYPE_UINT, struct track_entry, seek_preroll),
-  E_SINGLE_MASTER(ID_VIDEO, TYPE_MASTER, struct track_entry, video),
-  E_SINGLE_MASTER(ID_AUDIO, TYPE_MASTER, struct track_entry, audio),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_tracks_elements[] = {
-  E_MASTER(ID_TRACK_ENTRY, TYPE_MASTER, struct tracks, track_entry),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_cue_track_positions_elements[] = {
-  E_FIELD(ID_CUE_TRACK, TYPE_UINT, struct cue_track_positions, track),
-  E_FIELD(ID_CUE_CLUSTER_POSITION, TYPE_UINT, struct cue_track_positions, cluster_position),
-  E_FIELD(ID_CUE_BLOCK_NUMBER, TYPE_UINT, struct cue_track_positions, block_number),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_cue_point_elements[] = {
-  E_FIELD(ID_CUE_TIME, TYPE_UINT, struct cue_point, time),
-  E_MASTER(ID_CUE_TRACK_POSITIONS, TYPE_MASTER, struct cue_point, cue_track_positions),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_cues_elements[] = {
-  E_MASTER(ID_CUE_POINT, TYPE_MASTER, struct cues, cue_point),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_segment_elements[] = {
-  E_MASTER(ID_SEEK_HEAD, TYPE_MASTER, struct segment, seek_head),
-  E_SINGLE_MASTER(ID_INFO, TYPE_MASTER, struct segment, info),
-  E_MASTER(ID_CLUSTER, TYPE_MASTER, struct segment, cluster),
-  E_SINGLE_MASTER(ID_TRACKS, TYPE_MASTER, struct segment, tracks),
-  E_SINGLE_MASTER(ID_CUES, TYPE_MASTER, struct segment, cues),
-  E_LAST
-};
-
-static struct ebml_element_desc ne_top_level_elements[] = {
-  E_SINGLE_MASTER(ID_EBML, TYPE_MASTER, nestegg, ebml),
-  E_SINGLE_MASTER_O(ID_SEGMENT, TYPE_MASTER, nestegg, segment),
-  E_LAST
-};
-
-#undef E_FIELD
-#undef E_MASTER
-#undef E_SINGLE_MASTER_O
-#undef E_SINGLE_MASTER
-#undef E_SUSPEND
-#undef E_LAST
-
-static struct pool_ctx *
-ne_pool_init(void)
-{
-  struct pool_ctx * pool;
-
-  pool = h_malloc(sizeof(*pool));
-  if (!pool)
-    abort();
-  return pool;
-}
-
-static void
-ne_pool_destroy(struct pool_ctx * pool)
-{
-  h_free(pool);
-}
-
-static void *
-ne_pool_alloc(size_t size, struct pool_ctx * pool)
-{
-  void * p;
-
-  p = h_malloc(size);
-  if (!p)
-    abort();
-  hattach(p, pool);
-  memset(p, 0, size);
-  return p;
-}
-
-static void *
-ne_alloc(size_t size)
-{
-  void * p;
-
-  p = calloc(1, size);
-  if (!p)
-    abort();
-  return p;
-}
-
-static int
-ne_io_read(nestegg_io * io, void * buffer, size_t length)
-{
-  return io->read(buffer, length, io->userdata);
-}
-
-static int
-ne_io_seek(nestegg_io * io, int64_t offset, int whence)
-{
-  return io->seek(offset, whence, io->userdata);
-}
-
-static int
-ne_io_read_skip(nestegg_io * io, size_t length)
-{
-  size_t get;
-  unsigned char buf[8192];
-  int r = 1;
-
-  while (length > 0) {
-    get = length < sizeof(buf) ? length : sizeof(buf);
-    r = ne_io_read(io, buf, get);
-    if (r != 1)
-      break;
-    length -= get;
-  }
-
-  return r;
-}
-
-static int64_t
-ne_io_tell(nestegg_io * io)
-{
-  return io->tell(io->userdata);
-}
-
-static int
-ne_bare_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length, enum vint_mask maskflag)
-{
-  int r;
-  unsigned char b;
-  size_t maxlen = 8;
-  unsigned int count = 1, mask = 1 << 7;
-
-  r = ne_io_read(io, &b, 1);
-  if (r != 1)
-    return r;
-
-  while (count < maxlen) {
-    if ((b & mask) != 0)
-      break;
-    mask >>= 1;
-    count += 1;
-  }
-
-  if (length)
-    *length = count;
-  *value = b;
-
-  if (maskflag == MASK_FIRST_BIT)
-    *value = b & ~mask;
-
-  while (--count) {
-    r = ne_io_read(io, &b, 1);
-    if (r != 1)
-      return r;
-    *value <<= 8;
-    *value |= b;
-  }
-
-  return 1;
-}
-
-static int
-ne_read_id(nestegg_io * io, uint64_t * value, uint64_t * length)
-{
-  return ne_bare_read_vint(io, value, length, MASK_NONE);
-}
-
-static int
-ne_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length)
-{
-  return ne_bare_read_vint(io, value, length, MASK_FIRST_BIT);
-}
-
-static int
-ne_read_svint(nestegg_io * io, int64_t * value, uint64_t * length)
-{
-  int r;
-  uint64_t uvalue;
-  uint64_t ulength;
-  int64_t svint_subtr[] = {
-    0x3f, 0x1fff,
-    0xfffff, 0x7ffffff,
-    0x3ffffffffLL, 0x1ffffffffffLL,
-    0xffffffffffffLL, 0x7fffffffffffffLL
-  };
-
-  r = ne_bare_read_vint(io, &uvalue, &ulength, MASK_FIRST_BIT);
-  if (r != 1)
-    return r;
-  *value = uvalue - svint_subtr[ulength - 1];
-  if (length)
-    *length = ulength;
-  return r;
-}
-
-static int
-ne_read_uint(nestegg_io * io, uint64_t * val, uint64_t length)
-{
-  unsigned char b;
-  int r;
-
-  if (length == 0 || length > 8)
-    return -1;
-  r = ne_io_read(io, &b, 1);
-  if (r != 1)
-    return r;
-  *val = b;
-  while (--length) {
-    r = ne_io_read(io, &b, 1);
-    if (r != 1)
-      return r;
-    *val <<= 8;
-    *val |= b;
-  }
-  return 1;
-}
-
-static int
-ne_read_int(nestegg_io * io, int64_t * val, uint64_t length)
-{
-  int r;
-  uint64_t uval, base;
-
-  r = ne_read_uint(io, &uval, length);
-  if (r != 1)
-    return r;
-
-  if (length < sizeof(int64_t)) {
-    base = 1;
-    base <<= length * 8 - 1;
-    if (uval >= base) {
-        base = 1;
-        base <<= length * 8;
-    } else {
-      base = 0;
-    }
-    *val = uval - base;
-  } else {
-    *val = (int64_t) uval;
-  }
-
-  return 1;
-}
-
-static int
-ne_read_float(nestegg_io * io, double * val, uint64_t length)
-{
-  union {
-    uint64_t u;
-    float f;
-    double d;
-  } value;
-  int r;
-
-  /* Length == 10 not implemented. */
-  if (length != 4 && length != 8)
-    return -1;
-  r = ne_read_uint(io, &value.u, length);
-  if (r != 1)
-    return r;
-  if (length == 4)
-    *val = value.f;
-  else
-    *val = value.d;
-  return 1;
-}
-
-static int
-ne_read_string(nestegg * ctx, char ** val, uint64_t length)
-{
-  char * str;
-  int r;
-  const size_t alloc_size = (size_t)length + 1;
-
-  if (length == 0 || length > LIMIT_STRING)
-    return -1;
-  str = ne_pool_alloc(alloc_size, ctx->alloc_pool);
-  r = ne_io_read(ctx->io, (unsigned char *) str, alloc_size - 1);
-  if (r != 1)
-    return r;
-  str[alloc_size - 1] = '\0';
-  *val = str;
-  return 1;
-}
-
-static int
-ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length)
-{
-  if (length == 0 || length > LIMIT_BINARY)
-    return -1;
-  val->length = (size_t)length;
-  val->data = ne_pool_alloc(val->length, ctx->alloc_pool);
-  return ne_io_read(ctx->io, val->data, val->length);
-}
-
-static int
-ne_get_uint(struct ebml_type type, uint64_t * value)
-{
-  if (!type.read)
-    return -1;
-
-  assert(type.type == TYPE_UINT);
-
-  *value = type.v.u;
-
-  return 0;
-}
-
-static int
-ne_get_uint32(struct ebml_type type, unsigned int * value)
-{
-  uint64_t v;
-  if (ne_get_uint(type, &v))
-    return -1;
-
-  assert((unsigned int)v == v);
-
-  *value = (unsigned int)v;
-
-  return 0;
-}
-
-static int
-ne_get_float(struct ebml_type type, double * value)
-{
-  if (!type.read)
-    return -1;
-
-  assert(type.type == TYPE_FLOAT);
-
-  *value = type.v.f;
-
-  return 0;
-}
-
-static int
-ne_get_string(struct ebml_type type, char ** value)
-{
-  if (!type.read)
-    return -1;
-
-  assert(type.type == TYPE_STRING);
-
-  *value = type.v.s;
-
-  return 0;
-}
-
-static int
-ne_get_binary(struct ebml_type type, struct ebml_binary * value)
-{
-  if (!type.read)
-    return -1;
-
-  assert(type.type == TYPE_BINARY);
-
-  *value = type.v.b;
-
-  return 0;
-}
-
-static int
-ne_is_ancestor_element(uint64_t id, struct list_node * ancestor)
-{
-  struct ebml_element_desc * element;
-
-  for (; ancestor; ancestor = ancestor->previous)
-    for (element = ancestor->node; element->id; ++element)
-      if (element->id == id)
-        return 1;
-
-  return 0;
-}
-
-static struct ebml_element_desc *
-ne_find_element(uint64_t id, struct ebml_element_desc * elements)
-{
-  struct ebml_element_desc * element;
-
-  for (element = elements; element->id; ++element)
-    if (element->id == id)
-      return element;
-
-  return NULL;
-}
-
-static void
-ne_ctx_push(nestegg * ctx, struct ebml_element_desc * ancestor, void * data)
-{
-  struct list_node * item;
-
-  item = ne_alloc(sizeof(*item));
-  item->previous = ctx->ancestor;
-  item->node = ancestor;
-  item->data = data;
-  ctx->ancestor = item;
-}
-
-static void
-ne_ctx_pop(nestegg * ctx)
-{
-  struct list_node * item;
-
-  item = ctx->ancestor;
-  ctx->ancestor = item->previous;
-  free(item);
-}
-
-static int
-ne_ctx_save(nestegg * ctx, struct saved_state * s)
-{
-  s->stream_offset = ne_io_tell(ctx->io);
-  if (s->stream_offset < 0)
-    return -1;
-  s->ancestor = ctx->ancestor;
-  s->last_id = ctx->last_id;
-  s->last_size = ctx->last_size;
-  s->last_valid = ctx->last_valid;
-  return 0;
-}
-
-static int
-ne_ctx_restore(nestegg * ctx, struct saved_state * s)
-{
-  int r;
-
-  r = ne_io_seek(ctx->io, s->stream_offset, NESTEGG_SEEK_SET);
-  if (r != 0)
-    return -1;
-  ctx->ancestor = s->ancestor;
-  ctx->last_id = s->last_id;
-  ctx->last_size = s->last_size;
-  ctx->last_valid = s->last_valid;
-  return 0;
-}
-
-static int
-ne_peek_element(nestegg * ctx, uint64_t * id, uint64_t * size)
-{
-  int r;
-
-  if (ctx->last_valid) {
-    if (id)
-      *id = ctx->last_id;
-    if (size)
-      *size = ctx->last_size;
-    return 1;
-  }
-
-  r = ne_read_id(ctx->io, &ctx->last_id, NULL);
-  if (r != 1)
-    return r;
-
-  r = ne_read_vint(ctx->io, &ctx->last_size, NULL);
-  if (r != 1)
-    return r;
-
-  if (id)
-    *id = ctx->last_id;
-  if (size)
-    *size = ctx->last_size;
-
-  ctx->last_valid = 1;
-
-  return 1;
-}
-
-static int
-ne_read_element(nestegg * ctx, uint64_t * id, uint64_t * size)
-{
-  int r;
-
-  r = ne_peek_element(ctx, id, size);
-  if (r != 1)
-    return r;
-
-  ctx->last_valid = 0;
-
-  return 1;
-}
-
-static void
-ne_read_master(nestegg * ctx, struct ebml_element_desc * desc)
-{
-  struct ebml_list * list;
-  struct ebml_list_node * node, * oldtail;
-
-  assert(desc->type == TYPE_MASTER && desc->flags & DESC_FLAG_MULTI);
-
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, "multi master element %llx (%s)",
-           desc->id, desc->name);
-
-  list = (struct ebml_list *) (ctx->ancestor->data + desc->offset);
-
-  node = ne_pool_alloc(sizeof(*node), ctx->alloc_pool);
-  node->id = desc->id;
-  node->data = ne_pool_alloc(desc->size, ctx->alloc_pool);
-
-  oldtail = list->tail;
-  if (oldtail)
-    oldtail->next = node;
-  list->tail = node;
-  if (!list->head)
-    list->head = node;
-
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p", node->data);
-
-  ne_ctx_push(ctx, desc->children, node->data);
-}
-
-static void
-ne_read_single_master(nestegg * ctx, struct ebml_element_desc * desc)
-{
-  assert(desc->type == TYPE_MASTER && !(desc->flags & DESC_FLAG_MULTI));
-
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, "single master element %llx (%s)",
-           desc->id, desc->name);
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p (%u)",
-           ctx->ancestor->data + desc->offset, desc->offset);
-
-  ne_ctx_push(ctx, desc->children, ctx->ancestor->data + desc->offset);
-}
-
-static int
-ne_read_simple(nestegg * ctx, struct ebml_element_desc * desc, size_t length)
-{
-  struct ebml_type * storage;
-  int r = 0;
-
-  storage = (struct ebml_type *) (ctx->ancestor->data + desc->offset);
-
-  if (storage->read) {
-    ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) already read, skipping",
-             desc->id, desc->name);
-    return 0;
-  }
-
-  storage->type = desc->type;
-
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) -> %p (%u)",
-           desc->id, desc->name, storage, desc->offset);
-
-  switch (desc->type) {
-  case TYPE_UINT:
-    r = ne_read_uint(ctx->io, &storage->v.u, length);
-    break;
-  case TYPE_FLOAT:
-    r = ne_read_float(ctx->io, &storage->v.f, length);
-    break;
-  case TYPE_INT:
-    r = ne_read_int(ctx->io, &storage->v.i, length);
-    break;
-  case TYPE_STRING:
-    r = ne_read_string(ctx, &storage->v.s, length);
-    break;
-  case TYPE_BINARY:
-    r = ne_read_binary(ctx, &storage->v.b, length);
-    break;
-  case TYPE_MASTER:
-  case TYPE_UNKNOWN:
-    assert(0);
-    break;
-  }
-
-  if (r == 1)
-    storage->read = 1;
-
-  return r;
-}
-
-static int
-ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset)
-{
-  int r;
-  int64_t * data_offset;
-  uint64_t id, size, peeked_id;
-  struct ebml_element_desc * element;
-
-  if (!ctx->ancestor)
-    return -1;
-
-  for (;;) {
-    if (max_offset > 0 && ne_io_tell(ctx->io) >= max_offset) {
-      /* Reached end of offset allowed for parsing - return gracefully */
-      r = 1;
-      break;
-    }
-    r = ne_peek_element(ctx, &id, &size);
-    if (r != 1)
-      break;
-    peeked_id = id;
-
-    element = ne_find_element(id, ctx->ancestor->node);
-    if (element) {
-      if (element->flags & DESC_FLAG_SUSPEND) {
-        assert(element->type == TYPE_BINARY);
-        ctx->log(ctx, NESTEGG_LOG_DEBUG, "suspend parse at %llx", id);
-        r = 1;
-        break;
-      }
-
-      r = ne_read_element(ctx, &id, &size);
-      if (r != 1)
-        break;
-      assert(id == peeked_id);
-
-      if (element->flags & DESC_FLAG_OFFSET) {
-        data_offset = (int64_t *) (ctx->ancestor->data + element->data_offset);
-        *data_offset = ne_io_tell(ctx->io);
-        if (*data_offset < 0) {
-          r = -1;
-          break;
-        }
-      }
-
-      if (element->type == TYPE_MASTER) {
-        if (element->flags & DESC_FLAG_MULTI)
-          ne_read_master(ctx, element);
-        else
-          ne_read_single_master(ctx, element);
-        continue;
-      } else {
-        r = ne_read_simple(ctx, element, (size_t)size);
-        if (r < 0)
-          break;
-      }
-    } else if (ne_is_ancestor_element(id, ctx->ancestor->previous)) {
-      ctx->log(ctx, NESTEGG_LOG_DEBUG, "parent element %llx", id);
-      if (top_level && ctx->ancestor->node == top_level) {
-        ctx->log(ctx, NESTEGG_LOG_DEBUG, "*** parse about to back up past top_level");
-        r = 1;
-        break;
-      }
-      ne_ctx_pop(ctx);
-    } else {
-      r = ne_read_element(ctx, &id, &size);
-      if (r != 1)
-        break;
-
-      if (id != ID_VOID && id != ID_CRC32)
-        ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id);
-      r = ne_io_read_skip(ctx->io, (size_t)size);
-      if (r != 1)
-        break;
-    }
-  }
-
-  if (r != 1)
-    while (ctx->ancestor)
-      ne_ctx_pop(ctx);
-
-  return r;
-}
-
-static uint64_t
-ne_xiph_lace_value(unsigned char ** np)
-{
-  uint64_t lace;
-  uint64_t value;
-  unsigned char * p = *np;
-
-  lace = *p++;
-  value = lace;
-  while (lace == 255) {
-    lace = *p++;
-    value += lace;
-  }
-
-  *np = p;
-
-  return value;
-}
-
-static int
-ne_read_xiph_lace_value(nestegg_io * io, uint64_t * value, size_t * consumed)
-{
-  int r;
-  uint64_t lace;
-
-  r = ne_read_uint(io, &lace, 1);
-  if (r != 1)
-    return r;
-  *consumed += 1;
-
-  *value = lace;
-  while (lace == 255) {
-    r = ne_read_uint(io, &lace, 1);
-    if (r != 1)
-      return r;
-    *consumed += 1;
-    *value += lace;
-  }
-
-  return 1;
-}
-
-static int
-ne_read_xiph_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes)
-{
-  int r;
-  size_t i = 0;
-  uint64_t sum = 0;
-
-  while (--n) {
-    r = ne_read_xiph_lace_value(io, &sizes[i], read);
-    if (r != 1)
-      return r;
-    sum += sizes[i];
-    i += 1;
-  }
-
-  if (*read + sum > block)
-    return -1;
-
-  /* Last frame is the remainder of the block. */
-  sizes[i] = block - *read - sum;
-  return 1;
-}
-
-static int
-ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes)
-{
-  int r;
-  uint64_t lace, sum, length;
-  int64_t slace;
-  size_t i = 0;
-
-  r = ne_read_vint(io, &lace, &length);
-  if (r != 1)
-    return r;
-  assert(length <= 8);
-  *read += (size_t)length;
-
-  sizes[i] = lace;
-  sum = sizes[i];
-
-  i += 1;
-  n -= 1;
-
-  while (--n) {
-    r = ne_read_svint(io, &slace, &length);
-    if (r != 1)
-      return r;
-    assert(length <= 8);
-    *read += (size_t)length;
-    sizes[i] = sizes[i - 1] + slace;
-    sum += sizes[i];
-    i += 1;
-  }
-
-  if (*read + sum > block)
-    return -1;
-
-  /* Last frame is the remainder of the block. */
-  sizes[i] = block - *read - sum;
-  return 1;
-}
-
-static uint64_t
-ne_get_timecode_scale(nestegg * ctx)
-{
-  uint64_t scale;
-
-  if (ne_get_uint(ctx->segment.info.timecode_scale, &scale) != 0)
-    scale = 1000000;
-
-  return scale;
-}
-
-static int
-ne_map_track_number_to_index(nestegg * ctx,
-                             unsigned int track_number,
-                             unsigned int * track_index)
-{
-  struct ebml_list_node * node;
-  struct track_entry * t_entry;
-  uint64_t t_number = 0;
-
-  if (!track_index)
-    return -1;
-  *track_index = 0;
-
-  if (track_number == 0)
-    return -1;
-
-  node = ctx->segment.tracks.track_entry.head;
-  while (node) {
-    assert(node->id == ID_TRACK_ENTRY);
-    t_entry = node->data;
-    if (ne_get_uint(t_entry->number, &t_number) != 0)
-      return -1;
-    if (t_number == track_number)
-      return 0;
-    *track_index += 1;
-    node = node->next;
-  }
-
-  return -1;
-}
-
-static struct track_entry *
-ne_find_track_entry(nestegg * ctx, unsigned int track)
-{
-  struct ebml_list_node * node;
-  unsigned int tracks = 0;
-
-  node = ctx->segment.tracks.track_entry.head;
-  while (node) {
-    assert(node->id == ID_TRACK_ENTRY);
-    if (track == tracks)
-      return node->data;
-    tracks += 1;
-    node = node->next;
-  }
-
-  return NULL;
-}
-
-static int
-ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_packet ** data)
-{
-  int r;
-  int64_t timecode, abs_timecode;
-  nestegg_packet * pkt;
-  struct cluster * cluster;
-  struct frame * f, * last;
-  struct track_entry * entry;
-  const int track_scale = 1;
-  uint64_t track_number, length, frame_sizes[256], cluster_tc, flags, frames, tc_scale, total;
-  unsigned int i, lacing, track;
-  size_t consumed = 0;
-
-  *data = NULL;
-
-  if (block_size > LIMIT_BLOCK)
-    return -1;
-
-  r = ne_read_vint(ctx->io, &track_number, &length);
-  if (r != 1)
-    return r;
-
-  if (track_number == 0 || (unsigned int)track_number != track_number)
-    return -1;
-
-  assert(length <= 8);
-  consumed += (size_t)length;
-
-  r = ne_read_int(ctx->io, &timecode, 2);
-  if (r != 1)
-    return r;
-
-  consumed += 2;
-
-  r = ne_read_uint(ctx->io, &flags, 1);
-  if (r != 1)
-    return r;
-
-  consumed += 1;
-
-  frames = 0;
-
-  /* Flags are different between Block and SimpleBlock, but lacing is
-     encoded the same way. */
-  lacing = (flags & BLOCK_FLAGS_LACING) >> 1;
-
-  switch (lacing) {
-  case LACING_NONE:
-    frames = 1;
-    break;
-  case LACING_XIPH:
-  case LACING_FIXED:
-  case LACING_EBML:
-    r = ne_read_uint(ctx->io, &frames, 1);
-    if (r != 1)
-      return r;
-    consumed += 1;
-    frames += 1;
-  }
-
-  if (frames > 256)
-    return -1;
-
-  switch (lacing) {
-  case LACING_NONE:
-    frame_sizes[0] = block_size - consumed;
-    break;
-  case LACING_XIPH:
-    if (frames == 1)
-      return -1;
-    r = ne_read_xiph_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes);
-    if (r != 1)
-      return r;
-    break;
-  case LACING_FIXED:
-    if ((block_size - consumed) % frames)
-      return -1;
-    for (i = 0; i < frames; ++i)
-      frame_sizes[i] = (block_size - consumed) / frames;
-    break;
-  case LACING_EBML:
-    if (frames == 1)
-      return -1;
-    r = ne_read_ebml_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes);
-    if (r != 1)
-      return r;
-    break;
-  }
-
-  /* Sanity check unlaced frame sizes against total block size. */
-  total = consumed;
-  for (i = 0; i < frames; ++i)
-    total += frame_sizes[i];
-  if (total > block_size)
-    return -1;
-
-  if (ne_map_track_number_to_index(ctx, (unsigned int)track_number, &track) != 0)
-    return -1;
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  tc_scale = ne_get_timecode_scale(ctx);
-
-  assert(ctx->segment.cluster.tail->id == ID_CLUSTER);
-  cluster = ctx->segment.cluster.tail->data;
-  if (ne_get_uint(cluster->timecode, &cluster_tc) != 0)
-    return -1;
-
-  abs_timecode = timecode + cluster_tc;
-  if (abs_timecode < 0)
-    return -1;
-
-  pkt = ne_alloc(sizeof(*pkt));
-  pkt->track = track;
-  pkt->timecode = abs_timecode * tc_scale * track_scale;
-
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu",
-           block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames);
-
-  last = NULL;
-  for (i = 0; i < frames; ++i) {
-    if (frame_sizes[i] > LIMIT_FRAME) {
-      nestegg_free_packet(pkt);
-      return -1;
-    }
-    f = ne_alloc(sizeof(*f));
-    f->length = (size_t)frame_sizes[i];
-    f->data = ne_alloc(f->length);
-    r = ne_io_read(ctx->io, f->data, f->length);
-    if (r != 1) {
-      free(f->data);
-      free(f);
-      nestegg_free_packet(pkt);
-      return -1;
-    }
-
-    if (!last)
-      pkt->frame = f;
-    else
-      last->next = f;
-    last = f;
-  }
-
-  *data = pkt;
-
-  return 1;
-}
-
-static int
-ne_read_discard_padding(nestegg * ctx, nestegg_packet * pkt)
-{
-  int r;
-  uint64_t id, size;
-  struct ebml_element_desc * element;
-  struct ebml_type * storage;
-
-  r = ne_peek_element(ctx, &id, &size);
-  if (r != 1)
-    return r;
-
-  if (id != ID_DISCARD_PADDING)
-    return 1;
-
-  element = ne_find_element(id, ctx->ancestor->node);
-  if (!element)
-    return 1;
-
-  assert((size_t)size == size);
-  r = ne_read_simple(ctx, element, (size_t)size);
-  if (r != 1)
-    return r;
-  storage = (struct ebml_type *) (ctx->ancestor->data + element->offset);
-  pkt->discard_padding = storage->v.i;
-
-  return 1;
-}
-
-
-static uint64_t
-ne_buf_read_id(unsigned char const * p, size_t length)
-{
-  uint64_t id = 0;
-
-  while (length--) {
-    id <<= 8;
-    id |= *p++;
-  }
-
-  return id;
-}
-
-static struct seek *
-ne_find_seek_for_id(struct ebml_list_node * seek_head, uint64_t id)
-{
-  struct ebml_list * head;
-  struct ebml_list_node * seek;
-  struct ebml_binary binary_id;
-  struct seek * s;
-
-  while (seek_head) {
-    assert(seek_head->id == ID_SEEK_HEAD);
-    head = seek_head->data;
-    seek = head->head;
-
-    while (seek) {
-      assert(seek->id == ID_SEEK);
-      s = seek->data;
-
-      if (ne_get_binary(s->id, &binary_id) == 0 &&
-          ne_buf_read_id(binary_id.data, binary_id.length) == id)
-        return s;
-
-      seek = seek->next;
-    }
-
-    seek_head = seek_head->next;
-  }
-
-  return NULL;
-}
-
-static struct cue_track_positions *
-ne_find_cue_position_for_track(nestegg * ctx, struct ebml_list_node * node, unsigned int track)
-{
-  struct cue_track_positions * pos = NULL;
-  unsigned int track_number;
-  unsigned int t;
-
-  while (node) {
-    assert(node->id == ID_CUE_TRACK_POSITIONS);
-    pos = node->data;
-    if (ne_get_uint32(pos->track, &track_number) != 0)
-      return NULL;
-
-    if (ne_map_track_number_to_index(ctx, track_number, &t) != 0)
-      return NULL;
-
-    if (t == track)
-      return pos;
-
-    node = node->next;
-  }
-
-  return NULL;
-}
-
-static struct cue_point *
-ne_find_cue_point_for_tstamp(nestegg * ctx, struct ebml_list_node * cue_point, unsigned int track, uint64_t scale, uint64_t tstamp)
-{
-  uint64_t time;
-  struct cue_point * c, * prev = NULL;
-
-  while (cue_point) {
-    assert(cue_point->id == ID_CUE_POINT);
-    c = cue_point->data;
-
-    if (!prev)
-      prev = c;
-
-    if (ne_get_uint(c->time, &time) == 0 && time * scale > tstamp)
-      break;
-
-    if (ne_find_cue_position_for_track(ctx, c->cue_track_positions.head, track) != NULL)
-      prev = c;
-
-    cue_point = cue_point->next;
-  }
-
-  return prev;
-}
-
-static int
-ne_is_suspend_element(uint64_t id)
-{
-  if (id == ID_SIMPLE_BLOCK || id == ID_BLOCK)
-    return 1;
-  return 0;
-}
-
-static void
-ne_null_log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
-{
-  if (ctx && severity && fmt)
-    return;
-}
-
-static int
-ne_init_cue_points(nestegg * ctx, int64_t max_offset)
-{
-  int r;
-  struct ebml_list_node * node = ctx->segment.cues.cue_point.head;
-  struct seek * found;
-  uint64_t seek_pos, id;
-  struct saved_state state;
-
-  /* If there are no cues loaded, check for cues element in the seek head
-     and load it. */
-  if (!node) {
-    found = ne_find_seek_for_id(ctx->segment.seek_head.head, ID_CUES);
-    if (!found)
-      return -1;
-
-    if (ne_get_uint(found->position, &seek_pos) != 0)
-      return -1;
-
-    /* Save old parser state. */
-    r = ne_ctx_save(ctx, &state);
-    if (r != 0)
-      return -1;
-
-    /* Seek and set up parser state for segment-level element (Cues). */
-    r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET);
-    if (r != 0)
-      return -1;
-    ctx->last_valid = 0;
-
-    r = ne_read_element(ctx, &id, NULL);
-    if (r != 1)
-      return -1;
-
-    if (id != ID_CUES)
-      return -1;
-
-    ctx->ancestor = NULL;
-    ne_ctx_push(ctx, ne_top_level_elements, ctx);
-    ne_ctx_push(ctx, ne_segment_elements, &ctx->segment);
-    ne_ctx_push(ctx, ne_cues_elements, &ctx->segment.cues);
-    /* parser will run until end of cues element. */
-    ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cue elements");
-    r = ne_parse(ctx, ne_cues_elements, max_offset);
-    while (ctx->ancestor)
-      ne_ctx_pop(ctx);
-
-    /* Reset parser state to original state and seek back to old position. */
-    if (ne_ctx_restore(ctx, &state) != 0)
-      return -1;
-
-    if (r < 0)
-      return -1;
-
-    node = ctx->segment.cues.cue_point.head;
-    if (!node)
-      return -1;
-  }
-
-  return 0;
-}
-
-/* Three functions that implement the nestegg_io interface, operating on a
- * sniff_buffer. */
-struct sniff_buffer {
-  unsigned char const * buffer;
-  size_t length;
-  int64_t offset;
-};
-
-static int
-ne_buffer_read(void * buffer, size_t length, void * user_data)
-{
-  struct sniff_buffer * sb = user_data;
-
-  int rv = 1;
-  size_t available = sb->length - (size_t)sb->offset;
-
-  if (available < length)
-    return 0;
-
-  memcpy(buffer, sb->buffer + sb->offset, length);
-  sb->offset += length;
-
-  return rv;
-}
-
-static int
-ne_buffer_seek(int64_t offset, int whence, void * user_data)
-{
-  struct sniff_buffer * sb = user_data;
-  int64_t o = sb->offset;
-
-  switch(whence) {
-    case NESTEGG_SEEK_SET:
-      o = offset;
-      break;
-    case NESTEGG_SEEK_CUR:
-      o += offset;
-      break;
-    case NESTEGG_SEEK_END:
-      o = sb->length + offset;
-      break;
-  }
-
-  if (o < 0 || o > (int64_t) sb->length)
-    return -1;
-
-  sb->offset = o;
-  return 0;
-}
-
-static int64_t
-ne_buffer_tell(void * user_data)
-{
-  struct sniff_buffer * sb = user_data;
-  return sb->offset;
-}
-
-static int
-ne_match_webm(nestegg_io io, int64_t max_offset)
-{
-  int r;
-  uint64_t id;
-  char * doctype;
-  nestegg * ctx;
-
-  if (!(io.read && io.seek && io.tell))
-    return -1;
-
-  ctx = ne_alloc(sizeof(*ctx));
-
-  ctx->io = ne_alloc(sizeof(*ctx->io));
-  *ctx->io = io;
-  ctx->alloc_pool = ne_pool_init();
-  ctx->log = ne_null_log_callback;
-
-  r = ne_peek_element(ctx, &id, NULL);
-  if (r != 1) {
-    nestegg_destroy(ctx);
-    return 0;
-  }
-
-  if (id != ID_EBML) {
-    nestegg_destroy(ctx);
-    return 0;
-  }
-
-  ne_ctx_push(ctx, ne_top_level_elements, ctx);
-
-  /* we don't check the return value of ne_parse, that might fail because
-   * max_offset is not on a valid element end point. We only want to check
-   * the EBML ID and that the doctype is "webm". */
-  ne_parse(ctx, NULL, max_offset);
-
-  if (ne_get_string(ctx->ebml.doctype, &doctype) != 0 ||
-      strcmp(doctype, "webm") != 0) {
-    nestegg_destroy(ctx);
-    return 0;
-  }
-
-  nestegg_destroy(ctx);
-
-  return 1;
-}
-
-int
-nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback, int64_t max_offset)
-{
-  int r;
-  uint64_t id, version, docversion;
-  struct ebml_list_node * track;
-  char * doctype;
-  nestegg * ctx;
-
-  if (!(io.read && io.seek && io.tell))
-    return -1;
-
-  ctx = ne_alloc(sizeof(*ctx));
-
-  ctx->io = ne_alloc(sizeof(*ctx->io));
-  *ctx->io = io;
-  ctx->log = callback;
-  ctx->alloc_pool = ne_pool_init();
-
-  if (!ctx->log)
-    ctx->log = ne_null_log_callback;
-
-  r = ne_peek_element(ctx, &id, NULL);
-  if (r != 1) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  if (id != ID_EBML) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, "ctx %p", ctx);
-
-  ne_ctx_push(ctx, ne_top_level_elements, ctx);
-
-  r = ne_parse(ctx, NULL, max_offset);
-
-  if (r != 1) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  if (ne_get_uint(ctx->ebml.ebml_read_version, &version) != 0)
-    version = 1;
-  if (version != 1) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  if (ne_get_string(ctx->ebml.doctype, &doctype) != 0)
-    doctype = "matroska";
-  if (strcmp(doctype, "webm") != 0) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  if (ne_get_uint(ctx->ebml.doctype_read_version, &docversion) != 0)
-    docversion = 1;
-  if (docversion < 1 || docversion > 2) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  if (!ctx->segment.tracks.track_entry.head) {
-    nestegg_destroy(ctx);
-    return -1;
-  }
-
-  track = ctx->segment.tracks.track_entry.head;
-  ctx->track_count = 0;
-
-  while (track) {
-    ctx->track_count += 1;
-    track = track->next;
-  }
-
-  *context = ctx;
-
-  return 0;
-}
-
-void
-nestegg_destroy(nestegg * ctx)
-{
-  while (ctx->ancestor)
-    ne_ctx_pop(ctx);
-  ne_pool_destroy(ctx->alloc_pool);
-  free(ctx->io);
-  free(ctx);
-}
-
-int
-nestegg_duration(nestegg * ctx, uint64_t * duration)
-{
-  uint64_t tc_scale;
-  double unscaled_duration;
-
-  if (ne_get_float(ctx->segment.info.duration, &unscaled_duration) != 0)
-    return -1;
-
-  tc_scale = ne_get_timecode_scale(ctx);
-
-  *duration = (uint64_t) (unscaled_duration * tc_scale);
-  return 0;
-}
-
-int
-nestegg_tstamp_scale(nestegg * ctx, uint64_t * scale)
-{
-  *scale = ne_get_timecode_scale(ctx);
-  return 0;
-}
-
-int
-nestegg_track_count(nestegg * ctx, unsigned int * tracks)
-{
-  *tracks = ctx->track_count;
-  return 0;
-}
-
-int
-nestegg_get_cue_point(nestegg * ctx, unsigned int cluster_num, int64_t max_offset,
-                      int64_t * start_pos, int64_t * end_pos, uint64_t * tstamp)
-{
-  int range_obtained = 0;
-  unsigned int cluster_count = 0;
-  struct cue_point * cue_point;
-  struct cue_track_positions * pos;
-  uint64_t seek_pos, track_number, tc_scale, time;
-  struct ebml_list_node * cues_node = ctx->segment.cues.cue_point.head;
-  struct ebml_list_node * cue_pos_node = NULL;
-  unsigned int track = 0, track_count = 0, track_index;
-
-  if (!start_pos || !end_pos || !tstamp)
-    return -1;
-
-  /* Initialise return values */
-  *start_pos = -1;
-  *end_pos = -1;
-  *tstamp = 0;
-
-  if (!cues_node) {
-    ne_init_cue_points(ctx, max_offset);
-    cues_node = ctx->segment.cues.cue_point.head;
-    /* Verify cues have been added to context. */
-    if (!cues_node)
-      return -1;
-  }
-
-  nestegg_track_count(ctx, &track_count);
-
-  tc_scale = ne_get_timecode_scale(ctx);
-
-  while (cues_node && !range_obtained) {
-    assert(cues_node->id == ID_CUE_POINT);
-    cue_point = cues_node->data;
-    cue_pos_node = cue_point->cue_track_positions.head;
-    while (cue_pos_node) {
-      assert(cue_pos_node->id == ID_CUE_TRACK_POSITIONS);
-      pos = cue_pos_node->data;
-      for (track = 0; track < track_count; track++) {
-        if (ne_get_uint(pos->track, &track_number) != 0)
-          return -1;
-
-        if (ne_map_track_number_to_index(ctx, (unsigned int)track_number, &track_index) != 0)
-          return -1;
-
-        if (track_index == track) {
-          if (ne_get_uint(pos->cluster_position, &seek_pos) != 0)
-            return -1;
-          if (cluster_count == cluster_num) {
-            *start_pos = ctx->segment_offset+seek_pos;
-            if (ne_get_uint(cue_point->time, &time) != 0)
-              return -1;
-            *tstamp = time * tc_scale;
-          } else if (cluster_count == cluster_num+1) {
-            *end_pos = (ctx->segment_offset+seek_pos)-1;
-            range_obtained = 1;
-            break;
-          }
-          cluster_count++;
-        }
-      }
-      cue_pos_node = cue_pos_node->next;
-    }
-    cues_node = cues_node->next;
-  }
-
-  return 0;
-}
-
-int
-nestegg_offset_seek(nestegg * ctx, uint64_t offset)
-{
-  int r;
-
-  /* Seek and set up parser state for segment-level element (Cluster). */
-  r = ne_io_seek(ctx->io, offset, NESTEGG_SEEK_SET);
-  if (r != 0)
-    return -1;
-  ctx->last_valid = 0;
-
-  while (ctx->ancestor)
-    ne_ctx_pop(ctx);
-
-  ne_ctx_push(ctx, ne_top_level_elements, ctx);
-  ne_ctx_push(ctx, ne_segment_elements, &ctx->segment);
-
-  return 0;
-}
-
-int
-nestegg_track_seek(nestegg * ctx, unsigned int track, uint64_t tstamp)
-{
-  int r;
-  struct cue_point * cue_point;
-  struct cue_track_positions * pos;
-  uint64_t seek_pos, tc_scale;
-
-  /* If there are no cues loaded, check for cues element in the seek head
-     and load it. */
-  if (!ctx->segment.cues.cue_point.head) {
-    r = ne_init_cue_points(ctx, -1);
-    if (r != 0)
-      return -1;
-  }
-
-  tc_scale = ne_get_timecode_scale(ctx);
-
-  cue_point = ne_find_cue_point_for_tstamp(ctx, ctx->segment.cues.cue_point.head,
-                                           track, tc_scale, tstamp);
-  if (!cue_point)
-    return -1;
-
-  pos = ne_find_cue_position_for_track(ctx, cue_point->cue_track_positions.head, track);
-  if (pos == NULL)
-    return -1;
-
-  if (ne_get_uint(pos->cluster_position, &seek_pos) != 0)
-    return -1;
-
-  /* Seek and set up parser state for segment-level element (Cluster). */
-  r = nestegg_offset_seek(ctx, ctx->segment_offset + seek_pos);
-  ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cluster elements");
-  r = ne_parse(ctx, NULL, -1);
-  if (r != 1)
-    return -1;
-
-  if (!ne_is_suspend_element(ctx->last_id))
-    return -1;
-
-  return 0;
-}
-
-int
-nestegg_track_type(nestegg * ctx, unsigned int track)
-{
-  struct track_entry * entry;
-  uint64_t type;
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  if (ne_get_uint(entry->type, &type) != 0)
-    return -1;
-
-  if (type & TRACK_TYPE_VIDEO)
-    return NESTEGG_TRACK_VIDEO;
-
-  if (type & TRACK_TYPE_AUDIO)
-    return NESTEGG_TRACK_AUDIO;
-
-  return -1;
-}
-
-int
-nestegg_track_codec_id(nestegg * ctx, unsigned int track)
-{
-  char * codec_id;
-  struct track_entry * entry;
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  if (ne_get_string(entry->codec_id, &codec_id) != 0)
-    return -1;
-
-  if (strcmp(codec_id, TRACK_ID_VP8) == 0)
-    return NESTEGG_CODEC_VP8;
-
-  if (strcmp(codec_id, TRACK_ID_VP9) == 0)
-    return NESTEGG_CODEC_VP9;
-
-  if (strcmp(codec_id, TRACK_ID_VORBIS) == 0)
-    return NESTEGG_CODEC_VORBIS;
-
-  if (strcmp(codec_id, TRACK_ID_OPUS) == 0)
-    return NESTEGG_CODEC_OPUS;
-
-  return -1;
-}
-
-int
-nestegg_track_codec_data_count(nestegg * ctx, unsigned int track,
-                               unsigned int * count)
-{
-  struct track_entry * entry;
-  struct ebml_binary codec_private;
-  unsigned char * p;
-
-  *count = 0;
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS)
-    return -1;
-
-  if (ne_get_binary(entry->codec_private, &codec_private) != 0)
-    return -1;
-
-  if (codec_private.length < 1)
-    return -1;
-
-  p = codec_private.data;
-  *count = *p + 1;
-
-  if (*count > 3)
-    return -1;
-
-  return 0;
-}
-
-int
-nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item,
-                         unsigned char ** data, size_t * length)
-{
-  struct track_entry * entry;
-  struct ebml_binary codec_private;
-  uint64_t sizes[3], total;
-  unsigned char * p;
-  unsigned int count, i;
-
-  *data = NULL;
-  *length = 0;
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS
-    && nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_OPUS)
-    return -1;
-
-  if (ne_get_binary(entry->codec_private, &codec_private) != 0)
-    return -1;
-
-  if (nestegg_track_codec_id(ctx, track) == NESTEGG_CODEC_VORBIS) {
-      p = codec_private.data;
-      count = *p++ + 1;
-
-      if (count > 3)
-        return -1;
-
-      i = 0;
-      total = 0;
-      while (--count) {
-        sizes[i] = ne_xiph_lace_value(&p);
-        total += sizes[i];
-        i += 1;
-      }
-      sizes[i] = codec_private.length - total - (p - codec_private.data);
-
-      for (i = 0; i < item; ++i) {
-        if (sizes[i] > LIMIT_FRAME)
-          return -1;
-        p += sizes[i];
-      }
-      *data = p;
-      *length = (size_t)sizes[item];
-  } else {
-    *data = codec_private.data;
-    *length = codec_private.length;
-  }
-
-  return 0;
-}
-
-int
-nestegg_track_video_params(nestegg * ctx, unsigned int track,
-                           nestegg_video_params * params)
-{
-  struct track_entry * entry;
-  unsigned int value;
-
-  memset(params, 0, sizeof(*params));
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_VIDEO)
-    return -1;
-
-  value = 0;
-  ne_get_uint32(entry->video.stereo_mode, &value);
-  if (value <= NESTEGG_VIDEO_STEREO_TOP_BOTTOM ||
-      value == NESTEGG_VIDEO_STEREO_RIGHT_LEFT)
-    params->stereo_mode = value;
-
-  if (ne_get_uint32(entry->video.pixel_width, &value) != 0)
-    return -1;
-  params->width = value;
-
-  if (ne_get_uint32(entry->video.pixel_height, &value) != 0)
-    return -1;
-  params->height = value;
-
-  value = 0;
-  ne_get_uint32(entry->video.pixel_crop_bottom, &value);
-  params->crop_bottom = value;
-
-  value = 0;
-  ne_get_uint32(entry->video.pixel_crop_top, &value);
-  params->crop_top = value;
-
-  value = 0;
-  ne_get_uint32(entry->video.pixel_crop_left, &value);
-  params->crop_left = value;
-
-  value = 0;
-  ne_get_uint32(entry->video.pixel_crop_right, &value);
-  params->crop_right = value;
-
-  value = params->width;
-  ne_get_uint32(entry->video.display_width, &value);
-  params->display_width = value;
-
-  value = params->height;
-  ne_get_uint32(entry->video.display_height, &value);
-  params->display_height = value;
-
-  return 0;
-}
-
-int
-nestegg_track_audio_params(nestegg * ctx, unsigned int track,
-                           nestegg_audio_params * params)
-{
-  struct track_entry * entry;
-  unsigned int value;
-
-  memset(params, 0, sizeof(*params));
-
-  entry = ne_find_track_entry(ctx, track);
-  if (!entry)
-    return -1;
-
-  if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_AUDIO)
-    return -1;
-
-  params->rate = 8000;
-  ne_get_float(entry->audio.sampling_frequency, &params->rate);
-
-  value = 1;
-  ne_get_uint32(entry->audio.channels, &value);
-  params->channels = value;
-
-  value = 16;
-  ne_get_uint32(entry->audio.bit_depth, &value);
-  params->depth = value;
-
-  value = 0;
-  ne_get_uint32(entry->codec_delay, &value);
-  params->codec_delay = value;
-
-  value = 0;
-  ne_get_uint32(entry->seek_preroll, &value);
-  params->seek_preroll = value;
-
-  return 0;
-}
-
-int
-nestegg_read_packet(nestegg * ctx, nestegg_packet ** pkt)
-{
-  int r;
-  uint64_t id, size;
-
-  *pkt = NULL;
-
-  for (;;) {
-    r = ne_peek_element(ctx, &id, &size);
-    if (r != 1)
-      return r;
-
-    /* Any DESC_FLAG_SUSPEND fields must be handled here. */
-    if (ne_is_suspend_element(id)) {
-      r = ne_read_element(ctx, &id, &size);
-      if (r != 1)
-        return r;
-
-      /* The only DESC_FLAG_SUSPEND fields are Blocks and SimpleBlocks, which we
-         handle directly. */
-      r = ne_read_block(ctx, id, size, pkt);
-      if (r != 1)
-        return r;
-
-      r = ne_read_discard_padding(ctx, *pkt);
-      if (r != 1)
-        return r;
-
-      return r;
-    }
-
-    r =  ne_parse(ctx, NULL, -1);
-    if (r != 1)
-      return r;
-  }
-
-  return 1;
-}
-
-void
-nestegg_free_packet(nestegg_packet * pkt)
-{
-  struct frame * frame;
-
-  while (pkt->frame) {
-    frame = pkt->frame;
-    pkt->frame = frame->next;
-    free(frame->data);
-    free(frame);
-  }
-
- free(pkt);
-}
-
-int
-nestegg_packet_track(nestegg_packet * pkt, unsigned int * track)
-{
-  *track = (unsigned int)pkt->track;
-  return 0;
-}
-
-int
-nestegg_packet_tstamp(nestegg_packet * pkt, uint64_t * tstamp)
-{
-  *tstamp = pkt->timecode;
-  return 0;
-}
-
-int
-nestegg_packet_discard_padding(nestegg_packet * pkt, int64_t * discard_padding)
-{
-  *discard_padding = pkt->discard_padding;
-  return 0;
-}
-
-int
-nestegg_packet_count(nestegg_packet * pkt, unsigned int * count)
-{
-  struct frame * f = pkt->frame;
-
-  *count = 0;
-
-  while (f) {
-    *count += 1;
-    f = f->next;
-  }
-
-  return 0;
-}
-
-int
-nestegg_packet_data(nestegg_packet * pkt, unsigned int item,
-                    unsigned char ** data, size_t * length)
-{
-  struct frame * f = pkt->frame;
-  unsigned int count = 0;
-
-  *data = NULL;
-  *length = 0;
-
-  while (f) {
-    if (count == item) {
-      *data = f->data;
-      *length = f->length;
-      return 0;
-    }
-    count += 1;
-    f = f->next;
-  }
-
-  return -1;
-}
-
-int
-nestegg_has_cues(nestegg * ctx)
-{
-  return ctx->segment.cues.cue_point.head ||
-         ne_find_seek_for_id(ctx->segment.seek_head.head, ID_CUES);
-}
-
-int
-nestegg_sniff(unsigned char const * buffer, size_t length)
-{
-  nestegg_io io;
-  struct sniff_buffer user_data;
-
-  user_data.buffer = buffer;
-  user_data.length = length;
-  user_data.offset = 0;
-
-  io.read = ne_buffer_read;
-  io.seek = ne_buffer_seek;
-  io.tell = ne_buffer_tell;
-  io.userdata = &user_data;
-  return ne_match_webm(io, length);
-}
-
diff --git a/third_party/nestegg/test/test.c b/third_party/nestegg/test/test.c
deleted file mode 100644
index cc0753de7..000000000
--- a/third_party/nestegg/test/test.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright © 2010 Mozilla Foundation
- *
- * This program is made available under an ISC-style license.  See the
- * accompanying file LICENSE for details.
- */
-#include <assert.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include "nestegg/nestegg.h"
-
-#undef DEBUG
-#define SEEK_TEST
-
-static int
-stdio_read(void * p, size_t length, void * file)
-{
-  size_t r;
-  FILE * fp = file;
-
-  r = fread(p, length, 1, fp);
-  if (r == 0 && feof(fp))
-    return 0;
-  return r == 0 ? -1 : 1;
-}
-
-static int
-stdio_seek(int64_t offset, int whence, void * file)
-{
-  FILE * fp = file;
-  return fseek(fp, offset, whence);
-}
-
-static int64_t
-stdio_tell(void * fp)
-{
-  return ftell(fp);
-}
-
-static void
-log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
-{
-  va_list ap;
-  char const * sev = NULL;
-
-#if !defined(DEBUG)
-  if (severity < NESTEGG_LOG_WARNING)
-    return;
-#endif
-
-  switch (severity) {
-  case NESTEGG_LOG_DEBUG:
-    sev = "debug:   ";
-    break;
-  case NESTEGG_LOG_WARNING:
-    sev = "warning: ";
-    break;
-  case NESTEGG_LOG_CRITICAL:
-    sev = "critical:";
-    break;
-  default:
-    sev = "unknown: ";
-  }
-
-  fprintf(stderr, "%p %s ", (void *) ctx, sev);
-
-  va_start(ap, fmt);
-  vfprintf(stderr, fmt, ap);
-  va_end(ap);
-
-  fprintf(stderr, "\n");
-}
-
-int
-main(int argc, char * argv[])
-{
-  FILE * fp;
-  int r, type;
-  nestegg * ctx;
-  nestegg_audio_params aparams;
-  nestegg_packet * pkt;
-  nestegg_video_params vparams;
-  size_t length, size;
-  uint64_t duration, tstamp, pkt_tstamp;
-  unsigned char * codec_data, * ptr;
-  unsigned int cnt, i, j, track, tracks, pkt_cnt, pkt_track;
-  unsigned int data_items = 0;
-  nestegg_io io = {
-    stdio_read,
-    stdio_seek,
-    stdio_tell,
-    NULL
-  };
-
-  if (argc != 2)
-    return EXIT_FAILURE;
-
-  fp = fopen(argv[1], "rb");
-  if (!fp)
-    return EXIT_FAILURE;
-
-  io.userdata = fp;
-
-  ctx = NULL;
-  r = nestegg_init(&ctx, io, log_callback, -1);
-  if (r != 0)
-    return EXIT_FAILURE;
-
-  nestegg_track_count(ctx, &tracks);
-  nestegg_duration(ctx, &duration);
-#if defined(DEBUG)
-  fprintf(stderr, "media has %u tracks and duration %fs\n", tracks, duration / 1e9);
-#endif
-
-  for (i = 0; i < tracks; ++i) {
-    type = nestegg_track_type(ctx, i);
-#if defined(DEBUG)
-    fprintf(stderr, "track %u: type: %d codec: %d", i,
-            type, nestegg_track_codec_id(ctx, i));
-#endif
-    nestegg_track_codec_data_count(ctx, i, &data_items);
-    for (j = 0; j < data_items; ++j) {
-      nestegg_track_codec_data(ctx, i, j, &codec_data, &length);
-#if defined(DEBUG)
-      fprintf(stderr, " (%p, %u)", codec_data, (unsigned int) length);
-#endif
-    }
-    if (type == NESTEGG_TRACK_VIDEO) {
-      nestegg_track_video_params(ctx, i, &vparams);
-#if defined(DEBUG)
-      fprintf(stderr, " video: %ux%u (d: %ux%u %ux%ux%ux%u)",
-              vparams.width, vparams.height,
-              vparams.display_width, vparams.display_height,
-              vparams.crop_top, vparams.crop_left, vparams.crop_bottom, vparams.crop_right);
-#endif
-    } else if (type == NESTEGG_TRACK_AUDIO) {
-      nestegg_track_audio_params(ctx, i, &aparams);
-#if defined(DEBUG)
-      fprintf(stderr, " audio: %.2fhz %u bit %u channels",
-              aparams.rate, aparams.depth, aparams.channels);
-#endif
-    }
-#if defined(DEBUG)
-    fprintf(stderr, "\n");
-#endif
-  }
-
-#if defined(SEEK_TEST)
-#if defined(DEBUG)
-  fprintf(stderr, "seek to middle\n");
-#endif
-  r = nestegg_track_seek(ctx, 0, duration / 2);
-  if (r == 0) {
-#if defined(DEBUG)
-    fprintf(stderr, "middle ");
-#endif
-    r = nestegg_read_packet(ctx, &pkt);
-    if (r == 1) {
-      nestegg_packet_track(pkt, &track);
-      nestegg_packet_count(pkt, &cnt);
-      nestegg_packet_tstamp(pkt, &tstamp);
-#if defined(DEBUG)
-      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
-#endif
-      nestegg_free_packet(pkt);
-    } else {
-#if defined(DEBUG)
-      fprintf(stderr, "middle seek failed\n");
-#endif
-    }
-  }
-
-#if defined(DEBUG)
-  fprintf(stderr, "seek to ~end\n");
-#endif
-  r = nestegg_track_seek(ctx, 0, duration - (duration / 10));
-  if (r == 0) {
-#if defined(DEBUG)
-    fprintf(stderr, "end ");
-#endif
-    r = nestegg_read_packet(ctx, &pkt);
-    if (r == 1) {
-      nestegg_packet_track(pkt, &track);
-      nestegg_packet_count(pkt, &cnt);
-      nestegg_packet_tstamp(pkt, &tstamp);
-#if defined(DEBUG)
-      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
-#endif
-      nestegg_free_packet(pkt);
-    } else {
-#if defined(DEBUG)
-      fprintf(stderr, "end seek failed\n");
-#endif
-    }
-  }
-
-#if defined(DEBUG)
-  fprintf(stderr, "seek to ~start\n");
-#endif
-  r = nestegg_track_seek(ctx, 0, duration / 10);
-  if (r == 0) {
-#if defined(DEBUG)
-    fprintf(stderr, "start ");
-#endif
-    r = nestegg_read_packet(ctx, &pkt);
-    if (r == 1) {
-      nestegg_packet_track(pkt, &track);
-      nestegg_packet_count(pkt, &cnt);
-      nestegg_packet_tstamp(pkt, &tstamp);
-#if defined(DEBUG)
-      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
-#endif
-      nestegg_free_packet(pkt);
-    } else {
-#if defined(DEBUG)
-      fprintf(stderr, "start seek failed\n");
-#endif
-    }
-  }
-#endif
-
-  while (nestegg_read_packet(ctx, &pkt) > 0) {
-    nestegg_packet_track(pkt, &pkt_track);
-    nestegg_packet_count(pkt, &pkt_cnt);
-    nestegg_packet_tstamp(pkt, &pkt_tstamp);
-
-#if defined(DEBUG)
-    fprintf(stderr, "t %u pts %f frames %u: ", pkt_track, pkt_tstamp / 1e9, pkt_cnt);
-#endif
-
-    for (i = 0; i < pkt_cnt; ++i) {
-      nestegg_packet_data(pkt, i, &ptr, &size);
-#if defined(DEBUG)
-      fprintf(stderr, "%u ", (unsigned int) size);
-#endif
-    }
-#if defined(DEBUG)
-    fprintf(stderr, "\n");
-#endif
-
-    nestegg_free_packet(pkt);
-  }
-
-  nestegg_destroy(ctx);
-  fclose(fp);
-
-  return EXIT_SUCCESS;
-}
diff --git a/tools/ftfy.sh b/tools/ftfy.sh
index 92059f51c..29ae95e9b 100755
--- a/tools/ftfy.sh
+++ b/tools/ftfy.sh
@@ -123,7 +123,6 @@ git show > "${ORIG_DIFF}"
 for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do
   case "$f" in
     third_party/*) continue;;
-    nestegg/*) continue;;
   esac
   vpx_style "$f"
 done
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
deleted file mode 100644
index 329cc8275..000000000
--- a/vp8/common/pragmas.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP8_COMMON_PRAGMAS_H_
-#define VP8_COMMON_PRAGMAS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __INTEL_COMPILER
-#pragma warning(disable:997 1011 170)
-#endif
-#ifdef _MSC_VER
-#pragma warning(disable:4799)
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP8_COMMON_PRAGMAS_H_
diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2_x86_64.asm
index 6d5aaa19d..6d5aaa19d 100644
--- a/vp8/common/x86/loopfilter_block_sse2.asm
+++ b/vp8/common/x86/loopfilter_block_sse2_x86_64.asm
diff --git a/vp8/common/x86/variance_mmx.c b/vp8/common/x86/variance_mmx.c
index 36995db9a..02e02420f 100644
--- a/vp8/common/x86/variance_mmx.c
+++ b/vp8/common/x86/variance_mmx.c
@@ -10,7 +10,6 @@
 
 #include "vpx_config.h"
 #include "vp8/common/variance.h"
-#include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 #include "vp8/common/x86/filter_x86.h"
 
diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c
index 7fa5f53dc..1fe127bf2 100644
--- a/vp8/common/x86/variance_sse2.c
+++ b/vp8/common/x86/variance_sse2.c
@@ -10,7 +10,6 @@
 
 #include "vpx_config.h"
 #include "vp8/common/variance.h"
-#include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 #include "vp8/common/x86/filter_x86.h"
 
diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c
index f90f8117c..73eb90df6 100644
--- a/vp8/common/x86/variance_ssse3.c
+++ b/vp8/common/x86/variance_ssse3.c
@@ -10,7 +10,6 @@
 
 #include "vpx_config.h"
 #include "vp8/common/variance.h"
-#include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 
 extern unsigned int vp8_get16x16var_sse2
diff --git a/vp8/encoder/arm/neon/denoising_neon.c b/vp8/encoder/arm/neon/denoising_neon.c
index 32ce65abf..b8e403419 100644
--- a/vp8/encoder/arm/neon/denoising_neon.c
+++ b/vp8/encoder/arm/neon/denoising_neon.c
@@ -56,11 +56,13 @@ int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y,
      * increasing the adjustment for each level, level1 adjustment is
      * increased, the deltas stay the same.
      */
-    const uint8x16_t v_level1_adjustment = vdupq_n_u8(
-        (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 : 3);
+    int shift_inc  = (increase_denoising &&
+        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
+    const uint8x16_t v_level1_adjustment = vmovq_n_u8(
+        (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3);
     const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);
     const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);
-    const uint8x16_t v_level1_threshold = vdupq_n_u8(4);
+    const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc);
     const uint8x16_t v_level2_threshold = vdupq_n_u8(8);
     const uint8x16_t v_level3_threshold = vdupq_n_u8(16);
     int64x2_t v_sum_diff_total = vdupq_n_s64(0);
@@ -146,8 +148,10 @@ int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y,
         const int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
                                       vget_low_s64(v_sum_diff_total));
         const int s0 = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
+        int sum_diff_thresh = SUM_DIFF_THRESHOLD;
 
-        if (s0 > SUM_DIFF_THRESHOLD)
+        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
+        if (s0 > sum_diff_thresh)
             return COPY_BLOCK;
     }
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 78e54e248..9d0e69cf4 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -18,7 +18,6 @@
 #include <assert.h>
 #include <stdio.h>
 #include <limits.h>
-#include "vp8/common/pragmas.h"
 #include "vpx/vpx_encoder.h"
 #include "vpx_mem/vpx_mem.h"
 #include "bitstream.h"
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 387701c57..d9f39b507 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -15,7 +15,6 @@
 #include <assert.h>
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
-#include "vp8/common/pragmas.h"
 #include "tokenize.h"
 #include "treewriter.h"
 #include "onyx_int.h"
diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt_x86_64.asm
index 5964a85f2..5964a85f2 100644
--- a/vp8/encoder/x86/ssim_opt.asm
+++ b/vp8/encoder/x86/ssim_opt_x86_64.asm
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 8282547ea..0b1ac9eeb 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -9,7 +9,6 @@
 ##
 
 VP8_COMMON_SRCS-yes += vp8_common.mk
-VP8_COMMON_SRCS-yes += common/pragmas.h
 VP8_COMMON_SRCS-yes += common/ppflags.h
 VP8_COMMON_SRCS-yes += common/onyx.h
 VP8_COMMON_SRCS-yes += common/onyxd.h
@@ -115,7 +114,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
 endif
 
 ifeq ($(ARCH_X86_64),yes)
-VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2_x86_64.asm
 endif
 
 # common (c)
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 10cbc6a58..d6dfb1643 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -386,8 +386,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
     /* Set these even if already initialized.  The caller may have changed the
      * decrypt config between frames.
      */
-    ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb;
-    ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state;
+    if (ctx->decoder_init) {
+      ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb;
+      ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state;
+    }
 
     if (!res)
     {
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 607382b4c..b7b948add 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -100,7 +100,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
 VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
-VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm
+VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm
 
 ifeq ($(CONFIG_REALTIME_ONLY),yes)
 VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h
deleted file mode 100644
index 0efc713ca..000000000
--- a/vp9/common/vp9_pragmas.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_PRAGMAS_H_
-#define VP9_COMMON_VP9_PRAGMAS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __INTEL_COMPILER
-#pragma warning(disable:997 1011 170)
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(disable:4799)
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_COMMON_VP9_PRAGMAS_H_
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 182739620..c300cde62 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -356,7 +356,7 @@ specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
 $vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
 
 add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_256_add sse2 neon_asm dspr2/;
+specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
 $vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
 
 add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
@@ -422,6 +422,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
 
+add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_16x16 sse2/;
+$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
+
 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
 
@@ -431,6 +435,10 @@ specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
 add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
 
+add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_8x8 sse2/;
+$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
+
 add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance8x4/, "$sse2_x86inc";
 
@@ -520,82 +528,82 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
 specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
 
-add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad64x64/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
 specialize qw/vp9_sad32x64/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
 specialize qw/vp9_sad64x32/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
 specialize qw/vp9_sad32x16/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
 specialize qw/vp9_sad16x32/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad32x32/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
 specialize qw/vp9_sad8x4/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
 specialize qw/vp9_sad4x8/, "$sse_x86inc";
 
-add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
 specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
 
-add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
 
-add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
 
-add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
 specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
 
 add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 0231726dc..ff9c43221 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -8,12 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include <assert.h>
-#include <emmintrin.h>  // SSE2
-#include "./vpx_config.h"
-#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_idct.h"
+#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
 
 #define RECON_AND_STORE4X4(dest, in_x) \
 {                                                     \
@@ -527,16 +522,6 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
   out7 = _mm_subs_epi16(stp1_0, stp2_7); \
   }
 
-#define RECON_AND_STORE(dest, in_x) \
-  {                                                     \
-     __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
-      d0 = _mm_unpacklo_epi8(d0, zero); \
-      d0 = _mm_add_epi16(in_x, d0); \
-      d0 = _mm_packus_epi16(d0, d0); \
-      _mm_storel_epi64((__m128i *)(dest), d0); \
-      dest += stride; \
-  }
-
 void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
@@ -627,36 +612,6 @@ void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   RECON_AND_STORE(dest, dc_value);
 }
 
-// perform 8x8 transpose
-static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
-  const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
-  const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
-  const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
-  const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
-  const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
-  const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
-  const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
-  const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
-
-  const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
-  const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
-  const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
-  const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
-  const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
-  const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
-  const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
-  const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
-
-  res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
-  res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
-  res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
-  res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
-  res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
-  res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
-  res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
-  res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
-}
-
 static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
   const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
   const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
@@ -1573,23 +1528,6 @@ void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   }
 }
 
-static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
-  __m128i tbuf[8];
-  array_transpose_8x8(res0, res0);
-  array_transpose_8x8(res1, tbuf);
-  array_transpose_8x8(res0 + 8, res1);
-  array_transpose_8x8(res1 + 8, res1 + 8);
-
-  res0[8] = tbuf[0];
-  res0[9] = tbuf[1];
-  res0[10] = tbuf[2];
-  res0[11] = tbuf[3];
-  res0[12] = tbuf[4];
-  res0[13] = tbuf[5];
-  res0[14] = tbuf[6];
-  res0[15] = tbuf[7];
-}
-
 static void iadst16_8col(__m128i *in) {
   // perform 16x16 1-D ADST for 8 columns
   __m128i s[16], x[16], u[32], v[32];
@@ -2416,82 +2354,6 @@ static void iadst16_sse2(__m128i *in0, __m128i *in1) {
   iadst16_8col(in1);
 }
 
-static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
-  in[0]  = _mm_load_si128((const __m128i *)(input + 0 * 16));
-  in[1]  = _mm_load_si128((const __m128i *)(input + 1 * 16));
-  in[2]  = _mm_load_si128((const __m128i *)(input + 2 * 16));
-  in[3]  = _mm_load_si128((const __m128i *)(input + 3 * 16));
-  in[4]  = _mm_load_si128((const __m128i *)(input + 4 * 16));
-  in[5]  = _mm_load_si128((const __m128i *)(input + 5 * 16));
-  in[6]  = _mm_load_si128((const __m128i *)(input + 6 * 16));
-  in[7]  = _mm_load_si128((const __m128i *)(input + 7 * 16));
-
-  in[8]  = _mm_load_si128((const __m128i *)(input + 8 * 16));
-  in[9]  = _mm_load_si128((const __m128i *)(input + 9 * 16));
-  in[10]  = _mm_load_si128((const __m128i *)(input + 10 * 16));
-  in[11]  = _mm_load_si128((const __m128i *)(input + 11 * 16));
-  in[12]  = _mm_load_si128((const __m128i *)(input + 12 * 16));
-  in[13]  = _mm_load_si128((const __m128i *)(input + 13 * 16));
-  in[14]  = _mm_load_si128((const __m128i *)(input + 14 * 16));
-  in[15]  = _mm_load_si128((const __m128i *)(input + 15 * 16));
-}
-
-static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
-  const __m128i final_rounding = _mm_set1_epi16(1<<5);
-  const __m128i zero = _mm_setzero_si128();
-  // Final rounding and shift
-  in[0] = _mm_adds_epi16(in[0], final_rounding);
-  in[1] = _mm_adds_epi16(in[1], final_rounding);
-  in[2] = _mm_adds_epi16(in[2], final_rounding);
-  in[3] = _mm_adds_epi16(in[3], final_rounding);
-  in[4] = _mm_adds_epi16(in[4], final_rounding);
-  in[5] = _mm_adds_epi16(in[5], final_rounding);
-  in[6] = _mm_adds_epi16(in[6], final_rounding);
-  in[7] = _mm_adds_epi16(in[7], final_rounding);
-  in[8] = _mm_adds_epi16(in[8], final_rounding);
-  in[9] = _mm_adds_epi16(in[9], final_rounding);
-  in[10] = _mm_adds_epi16(in[10], final_rounding);
-  in[11] = _mm_adds_epi16(in[11], final_rounding);
-  in[12] = _mm_adds_epi16(in[12], final_rounding);
-  in[13] = _mm_adds_epi16(in[13], final_rounding);
-  in[14] = _mm_adds_epi16(in[14], final_rounding);
-  in[15] = _mm_adds_epi16(in[15], final_rounding);
-
-  in[0] = _mm_srai_epi16(in[0], 6);
-  in[1] = _mm_srai_epi16(in[1], 6);
-  in[2] = _mm_srai_epi16(in[2], 6);
-  in[3] = _mm_srai_epi16(in[3], 6);
-  in[4] = _mm_srai_epi16(in[4], 6);
-  in[5] = _mm_srai_epi16(in[5], 6);
-  in[6] = _mm_srai_epi16(in[6], 6);
-  in[7] = _mm_srai_epi16(in[7], 6);
-  in[8] = _mm_srai_epi16(in[8], 6);
-  in[9] = _mm_srai_epi16(in[9], 6);
-  in[10] = _mm_srai_epi16(in[10], 6);
-  in[11] = _mm_srai_epi16(in[11], 6);
-  in[12] = _mm_srai_epi16(in[12], 6);
-  in[13] = _mm_srai_epi16(in[13], 6);
-  in[14] = _mm_srai_epi16(in[14], 6);
-  in[15] = _mm_srai_epi16(in[15], 6);
-
-  RECON_AND_STORE(dest, in[0]);
-  RECON_AND_STORE(dest, in[1]);
-  RECON_AND_STORE(dest, in[2]);
-  RECON_AND_STORE(dest, in[3]);
-  RECON_AND_STORE(dest, in[4]);
-  RECON_AND_STORE(dest, in[5]);
-  RECON_AND_STORE(dest, in[6]);
-  RECON_AND_STORE(dest, in[7]);
-  RECON_AND_STORE(dest, in[8]);
-  RECON_AND_STORE(dest, in[9]);
-  RECON_AND_STORE(dest, in[10]);
-  RECON_AND_STORE(dest, in[11]);
-  RECON_AND_STORE(dest, in[12]);
-  RECON_AND_STORE(dest, in[13]);
-  RECON_AND_STORE(dest, in[14]);
-  RECON_AND_STORE(dest, in[15]);
-}
-
 void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
                                int tx_type) {
   __m128i in0[16], in1[16];
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.h b/vp9/common/x86/vp9_idct_intrin_sse2.h
new file mode 100644
index 000000000..1c62e3272
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.h
@@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h>  // SSE2
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+// perform 8x8 transpose
+static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
+  const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+  const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+  const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
+  const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
+  const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+  const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+  const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
+  const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
+
+  const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+  const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+  const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+  const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+  const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+  const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+  const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+  const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+
+  res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
+  res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
+  res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
+  res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
+  res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
+  res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
+  res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
+  res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
+}
+
+static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
+  __m128i tbuf[8];
+  array_transpose_8x8(res0, res0);
+  array_transpose_8x8(res1, tbuf);
+  array_transpose_8x8(res0 + 8, res1);
+  array_transpose_8x8(res1 + 8, res1 + 8);
+
+  res0[8] = tbuf[0];
+  res0[9] = tbuf[1];
+  res0[10] = tbuf[2];
+  res0[11] = tbuf[3];
+  res0[12] = tbuf[4];
+  res0[13] = tbuf[5];
+  res0[14] = tbuf[6];
+  res0[15] = tbuf[7];
+}
+
+static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
+  in[0]  = _mm_load_si128((const __m128i *)(input + 0 * 16));
+  in[1]  = _mm_load_si128((const __m128i *)(input + 1 * 16));
+  in[2]  = _mm_load_si128((const __m128i *)(input + 2 * 16));
+  in[3]  = _mm_load_si128((const __m128i *)(input + 3 * 16));
+  in[4]  = _mm_load_si128((const __m128i *)(input + 4 * 16));
+  in[5]  = _mm_load_si128((const __m128i *)(input + 5 * 16));
+  in[6]  = _mm_load_si128((const __m128i *)(input + 6 * 16));
+  in[7]  = _mm_load_si128((const __m128i *)(input + 7 * 16));
+
+  in[8]  = _mm_load_si128((const __m128i *)(input + 8 * 16));
+  in[9]  = _mm_load_si128((const __m128i *)(input + 9 * 16));
+  in[10]  = _mm_load_si128((const __m128i *)(input + 10 * 16));
+  in[11]  = _mm_load_si128((const __m128i *)(input + 11 * 16));
+  in[12]  = _mm_load_si128((const __m128i *)(input + 12 * 16));
+  in[13]  = _mm_load_si128((const __m128i *)(input + 13 * 16));
+  in[14]  = _mm_load_si128((const __m128i *)(input + 14 * 16));
+  in[15]  = _mm_load_si128((const __m128i *)(input + 15 * 16));
+}
+
+#define RECON_AND_STORE(dest, in_x) \
+  {                                                     \
+     __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
+      d0 = _mm_unpacklo_epi8(d0, zero); \
+      d0 = _mm_add_epi16(in_x, d0); \
+      d0 = _mm_packus_epi16(d0, d0); \
+      _mm_storel_epi64((__m128i *)(dest), d0); \
+      dest += stride; \
+  }
+
+static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
+  const __m128i final_rounding = _mm_set1_epi16(1<<5);
+  const __m128i zero = _mm_setzero_si128();
+  // Final rounding and shift
+  in[0] = _mm_adds_epi16(in[0], final_rounding);
+  in[1] = _mm_adds_epi16(in[1], final_rounding);
+  in[2] = _mm_adds_epi16(in[2], final_rounding);
+  in[3] = _mm_adds_epi16(in[3], final_rounding);
+  in[4] = _mm_adds_epi16(in[4], final_rounding);
+  in[5] = _mm_adds_epi16(in[5], final_rounding);
+  in[6] = _mm_adds_epi16(in[6], final_rounding);
+  in[7] = _mm_adds_epi16(in[7], final_rounding);
+  in[8] = _mm_adds_epi16(in[8], final_rounding);
+  in[9] = _mm_adds_epi16(in[9], final_rounding);
+  in[10] = _mm_adds_epi16(in[10], final_rounding);
+  in[11] = _mm_adds_epi16(in[11], final_rounding);
+  in[12] = _mm_adds_epi16(in[12], final_rounding);
+  in[13] = _mm_adds_epi16(in[13], final_rounding);
+  in[14] = _mm_adds_epi16(in[14], final_rounding);
+  in[15] = _mm_adds_epi16(in[15], final_rounding);
+
+  in[0] = _mm_srai_epi16(in[0], 6);
+  in[1] = _mm_srai_epi16(in[1], 6);
+  in[2] = _mm_srai_epi16(in[2], 6);
+  in[3] = _mm_srai_epi16(in[3], 6);
+  in[4] = _mm_srai_epi16(in[4], 6);
+  in[5] = _mm_srai_epi16(in[5], 6);
+  in[6] = _mm_srai_epi16(in[6], 6);
+  in[7] = _mm_srai_epi16(in[7], 6);
+  in[8] = _mm_srai_epi16(in[8], 6);
+  in[9] = _mm_srai_epi16(in[9], 6);
+  in[10] = _mm_srai_epi16(in[10], 6);
+  in[11] = _mm_srai_epi16(in[11], 6);
+  in[12] = _mm_srai_epi16(in[12], 6);
+  in[13] = _mm_srai_epi16(in[13], 6);
+  in[14] = _mm_srai_epi16(in[14], 6);
+  in[15] = _mm_srai_epi16(in[15], 6);
+
+  RECON_AND_STORE(dest, in[0]);
+  RECON_AND_STORE(dest, in[1]);
+  RECON_AND_STORE(dest, in[2]);
+  RECON_AND_STORE(dest, in[3]);
+  RECON_AND_STORE(dest, in[4]);
+  RECON_AND_STORE(dest, in[5]);
+  RECON_AND_STORE(dest, in[6]);
+  RECON_AND_STORE(dest, in[7]);
+  RECON_AND_STORE(dest, in[8]);
+  RECON_AND_STORE(dest, in[9]);
+  RECON_AND_STORE(dest, in[10]);
+  RECON_AND_STORE(dest, in[11]);
+  RECON_AND_STORE(dest, in[12]);
+  RECON_AND_STORE(dest, in[13]);
+  RECON_AND_STORE(dest, in[14]);
+  RECON_AND_STORE(dest, in[15]);
+}
diff --git a/vp9/common/x86/vp9_idct_intrin_ssse3.c b/vp9/common/x86/vp9_idct_intrin_ssse3.c
new file mode 100644
index 000000000..9a6980e0f
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c
@@ -0,0 +1,326 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <tmmintrin.h>  // SSSE3
+#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
+
+static void idct16_8col(__m128i *in) {
+  const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+  const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+  const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+  const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+  const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+  const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+  const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170);
+
+  __m128i v[16], u[16], s[16], t[16];
+
+  // stage 1
+  s[0] = in[0];
+  s[1] = in[8];
+  s[2] = in[4];
+  s[3] = in[12];
+  s[4] = in[2];
+  s[5] = in[10];
+  s[6] = in[6];
+  s[7] = in[14];
+  s[8] = in[1];
+  s[9] = in[9];
+  s[10] = in[5];
+  s[11] = in[13];
+  s[12] = in[3];
+  s[13] = in[11];
+  s[14] = in[7];
+  s[15] = in[15];
+
+  // stage 2
+  u[0] = _mm_unpacklo_epi16(s[8], s[15]);
+  u[1] = _mm_unpackhi_epi16(s[8], s[15]);
+  u[2] = _mm_unpacklo_epi16(s[9], s[14]);
+  u[3] = _mm_unpackhi_epi16(s[9], s[14]);
+  u[4] = _mm_unpacklo_epi16(s[10], s[13]);
+  u[5] = _mm_unpackhi_epi16(s[10], s[13]);
+  u[6] = _mm_unpacklo_epi16(s[11], s[12]);
+  u[7] = _mm_unpackhi_epi16(s[11], s[12]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+  u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+  u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+  u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+  u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+  u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+  u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+  u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+  u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+  u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+  u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+  u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+  u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+  u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+  u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+  u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+  u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+  s[8]  = _mm_packs_epi32(u[0], u[1]);
+  s[15] = _mm_packs_epi32(u[2], u[3]);
+  s[9]  = _mm_packs_epi32(u[4], u[5]);
+  s[14] = _mm_packs_epi32(u[6], u[7]);
+  s[10] = _mm_packs_epi32(u[8], u[9]);
+  s[13] = _mm_packs_epi32(u[10], u[11]);
+  s[11] = _mm_packs_epi32(u[12], u[13]);
+  s[12] = _mm_packs_epi32(u[14], u[15]);
+
+  // stage 3
+  t[0] = s[0];
+  t[1] = s[1];
+  t[2] = s[2];
+  t[3] = s[3];
+  u[0] = _mm_unpacklo_epi16(s[4], s[7]);
+  u[1] = _mm_unpackhi_epi16(s[4], s[7]);
+  u[2] = _mm_unpacklo_epi16(s[5], s[6]);
+  u[3] = _mm_unpackhi_epi16(s[5], s[6]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+  t[4] = _mm_packs_epi32(u[0], u[1]);
+  t[7] = _mm_packs_epi32(u[2], u[3]);
+  t[5] = _mm_packs_epi32(u[4], u[5]);
+  t[6] = _mm_packs_epi32(u[6], u[7]);
+  t[8] = _mm_add_epi16(s[8], s[9]);
+  t[9] = _mm_sub_epi16(s[8], s[9]);
+  t[10] = _mm_sub_epi16(s[11], s[10]);
+  t[11] = _mm_add_epi16(s[10], s[11]);
+  t[12] = _mm_add_epi16(s[12], s[13]);
+  t[13] = _mm_sub_epi16(s[12], s[13]);
+  t[14] = _mm_sub_epi16(s[15], s[14]);
+  t[15] = _mm_add_epi16(s[14], s[15]);
+
+  // stage 4
+  u[0] = _mm_add_epi16(t[0], t[1]);
+  u[1] = _mm_sub_epi16(t[0], t[1]);
+  u[2] = _mm_unpacklo_epi16(t[2], t[3]);
+  u[3] = _mm_unpackhi_epi16(t[2], t[3]);
+  u[4] = _mm_unpacklo_epi16(t[9], t[14]);
+  u[5] = _mm_unpackhi_epi16(t[9], t[14]);
+  u[6] = _mm_unpacklo_epi16(t[10], t[13]);
+  u[7] = _mm_unpackhi_epi16(t[10], t[13]);
+
+  s[0] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+  s[1] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24);
+
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+  u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+  u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+  u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+  u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+  u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+  u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+  u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+  u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+  u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+  u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+  u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+  u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+  u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+  u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+  u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+  u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+  s[2] = _mm_packs_epi32(u[4], u[5]);
+  s[3] = _mm_packs_epi32(u[6], u[7]);
+  s[4] = _mm_add_epi16(t[4], t[5]);
+  s[5] = _mm_sub_epi16(t[4], t[5]);
+  s[6] = _mm_sub_epi16(t[7], t[6]);
+  s[7] = _mm_add_epi16(t[6], t[7]);
+  s[8] = t[8];
+  s[15] = t[15];
+  s[9]  = _mm_packs_epi32(u[8], u[9]);
+  s[14] = _mm_packs_epi32(u[10], u[11]);
+  s[10] = _mm_packs_epi32(u[12], u[13]);
+  s[13] = _mm_packs_epi32(u[14], u[15]);
+  s[11] = t[11];
+  s[12] = t[12];
+
+  // stage 5
+  t[0] = _mm_add_epi16(s[0], s[3]);
+  t[1] = _mm_add_epi16(s[1], s[2]);
+  t[2] = _mm_sub_epi16(s[1], s[2]);
+  t[3] = _mm_sub_epi16(s[0], s[3]);
+  t[4] = s[4];
+  t[7] = s[7];
+
+  u[0] = _mm_sub_epi16(s[6], s[5]);
+  u[1] = _mm_add_epi16(s[6], s[5]);
+  t[5] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+  t[6] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+
+  t[8] = _mm_add_epi16(s[8], s[11]);
+  t[9] = _mm_add_epi16(s[9], s[10]);
+  t[10] = _mm_sub_epi16(s[9], s[10]);
+  t[11] = _mm_sub_epi16(s[8], s[11]);
+  t[12] = _mm_sub_epi16(s[15], s[12]);
+  t[13] = _mm_sub_epi16(s[14], s[13]);
+  t[14] = _mm_add_epi16(s[13], s[14]);
+  t[15] = _mm_add_epi16(s[12], s[15]);
+
+  // stage 6
+  s[0] = _mm_add_epi16(t[0], t[7]);
+  s[1] = _mm_add_epi16(t[1], t[6]);
+  s[2] = _mm_add_epi16(t[2], t[5]);
+  s[3] = _mm_add_epi16(t[3], t[4]);
+  s[4] = _mm_sub_epi16(t[3], t[4]);
+  s[5] = _mm_sub_epi16(t[2], t[5]);
+  s[6] = _mm_sub_epi16(t[1], t[6]);
+  s[7] = _mm_sub_epi16(t[0], t[7]);
+  s[8] = t[8];
+  s[9] = t[9];
+
+  u[0] = _mm_sub_epi16(t[13], t[10]);
+  u[1] = _mm_add_epi16(t[13], t[10]);
+  u[2] = _mm_sub_epi16(t[12], t[11]);
+  u[3] = _mm_add_epi16(t[12], t[11]);
+
+  s[10] = _mm_mulhrs_epi16(u[0], k__cospi_p16_p16_x2);
+  s[13] = _mm_mulhrs_epi16(u[1], k__cospi_p16_p16_x2);
+  s[11] = _mm_mulhrs_epi16(u[2], k__cospi_p16_p16_x2);
+  s[12] = _mm_mulhrs_epi16(u[3], k__cospi_p16_p16_x2);
+  s[14] = t[14];
+  s[15] = t[15];
+
+  // stage 7
+  in[0] = _mm_add_epi16(s[0], s[15]);
+  in[1] = _mm_add_epi16(s[1], s[14]);
+  in[2] = _mm_add_epi16(s[2], s[13]);
+  in[3] = _mm_add_epi16(s[3], s[12]);
+  in[4] = _mm_add_epi16(s[4], s[11]);
+  in[5] = _mm_add_epi16(s[5], s[10]);
+  in[6] = _mm_add_epi16(s[6], s[9]);
+  in[7] = _mm_add_epi16(s[7], s[8]);
+  in[8] = _mm_sub_epi16(s[7], s[8]);
+  in[9] = _mm_sub_epi16(s[6], s[9]);
+  in[10] = _mm_sub_epi16(s[5], s[10]);
+  in[11] = _mm_sub_epi16(s[4], s[11]);
+  in[12] = _mm_sub_epi16(s[3], s[12]);
+  in[13] = _mm_sub_epi16(s[2], s[13]);
+  in[14] = _mm_sub_epi16(s[1], s[14]);
+  in[15] = _mm_sub_epi16(s[0], s[15]);
+}
+
+static void idct16_sse2(__m128i *in0, __m128i *in1) {
+  array_transpose_16x16(in0, in1);
+  idct16_8col(in0);
+  idct16_8col(in1);
+}
+
+void vp9_idct16x16_256_add_ssse3(const int16_t *input, uint8_t *dest,
+                                int stride) {
+  __m128i in0[16], in1[16];
+
+  load_buffer_8x16(input, in0);
+  input += 8;
+  load_buffer_8x16(input, in1);
+
+  idct16_sse2(in0, in1);
+  idct16_sse2(in0, in1);
+
+  write_buffer_8x16(dest, in0, stride);
+  dest += 8;
+  write_buffer_8x16(dest, in1, stride);
+}
diff --git a/vp9/common/x86/vp9_idct_ssse3.asm b/vp9/common/x86/vp9_idct_ssse3_x86_64.asm
index 2c1060710..2c1060710 100644
--- a/vp9/common/x86/vp9_idct_ssse3.asm
+++ b/vp9/common/x86/vp9_idct_ssse3_x86_64.asm
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index b84db970e..d109e136a 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -111,21 +111,21 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
 
     // filter the source buffer
     srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
-    srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
+    srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
-    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
 
     // add and saturate the results together
     srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
 
     // filter the source buffer
-    srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
+    srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
     srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
-    srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+    srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
     srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
 
     // add and saturate the results together
@@ -146,21 +146,21 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
 
     // filter the source buffer
     srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
-    srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
+    srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
-    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
+    srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
 
     // add and saturate the results together
     srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
 
     // filter the source buffer
-    srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
+    srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
     srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
-    srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, forthFilters);
+    srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
     srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
 
     // add and saturate the results together
@@ -208,26 +208,26 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
     srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1,
                     _mm256_castsi256_si128(filt1Reg));
     srcRegFilt2 = _mm_shuffle_epi8(srcReg1,
-                  _mm256_castsi256_si128(filt2Reg));
+                  _mm256_castsi256_si128(filt4Reg));
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1,
                     _mm256_castsi256_si128(firstFilters));
     srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
-                  _mm256_castsi256_si128(secondFilters));
+                  _mm256_castsi256_si128(forthFilters));
 
     // add and saturate the results together
     srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
 
     // filter the source buffer
     srcRegFilt3= _mm_shuffle_epi8(srcReg1,
-                 _mm256_castsi256_si128(filt4Reg));
+                 _mm256_castsi256_si128(filt2Reg));
     srcRegFilt2= _mm_shuffle_epi8(srcReg1,
                  _mm256_castsi256_si128(filt3Reg));
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
-                  _mm256_castsi256_si128(forthFilters));
+                  _mm256_castsi256_si128(secondFilters));
     srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
                   _mm256_castsi256_si128(thirdFilters));
 
@@ -247,26 +247,26 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
     srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2,
                     _mm256_castsi256_si128(filt1Reg));
     srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
-                  _mm256_castsi256_si128(filt2Reg));
+                  _mm256_castsi256_si128(filt4Reg));
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1,
                     _mm256_castsi256_si128(firstFilters));
     srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
-                  _mm256_castsi256_si128(secondFilters));
+                  _mm256_castsi256_si128(forthFilters));
 
     // add and saturate the results together
     srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
 
     // filter the source buffer
     srcRegFilt3 = _mm_shuffle_epi8(srcReg2,
-                  _mm256_castsi256_si128(filt4Reg));
+                  _mm256_castsi256_si128(filt2Reg));
     srcRegFilt2 = _mm_shuffle_epi8(srcReg2,
                   _mm256_castsi256_si128(filt3Reg));
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3,
-                  _mm256_castsi256_si128(forthFilters));
+                  _mm256_castsi256_si128(secondFilters));
     srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2,
                   _mm256_castsi256_si128(thirdFilters));
 
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
index cf28d8d2b..c4efa6565 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
@@ -44,7 +44,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
                                          unsigned int output_pitch,
                                          unsigned int output_height,
                                          int16_t *filter) {
-  __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
+  __m128i firstFilters, secondFilters, shuffle1, shuffle2;
   __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
   __m128i addFilterReg64, filtersReg, srcReg, minReg;
   unsigned int i;
@@ -61,20 +61,22 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
   // duplicate only the third 16 bit in the filter into the first lane
   secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
   // duplicate only the seconds 16 bits in the filter into the second lane
+  // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3
   firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
   // duplicate only the forth 16 bits in the filter into the second lane
+  // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7
   secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
 
   // loading the local filters
-  thirdFilters =_mm_load_si128((__m128i const *)filt1_4_h8);
-  forthFilters = _mm_load_si128((__m128i const *)filt2_4_h8);
+  shuffle1 =_mm_load_si128((__m128i const *)filt1_4_h8);
+  shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8);
 
   for (i = 0; i < output_height; i++) {
     srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
 
     // filter the source buffer
-    srcRegFilt1= _mm_shuffle_epi8(srcReg, thirdFilters);
-    srcRegFilt2= _mm_shuffle_epi8(srcReg, forthFilters);
+    srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1);
+    srcRegFilt2= _mm_shuffle_epi8(srcReg, shuffle2);
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
@@ -164,12 +166,12 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
     srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters);
 
     // add and saturate all the results together
-    minReg = _mm_min_epi16(srcRegFilt4, srcRegFilt3);
-    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
+    minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
+    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
 
-    srcRegFilt4= _mm_max_epi16(srcRegFilt4, srcRegFilt3);
+    srcRegFilt2= _mm_max_epi16(srcRegFilt2, srcRegFilt3);
     srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
-    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
+    srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
     srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
 
     // shift by 7 bit each 16 bits
@@ -229,21 +231,21 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
 
     // filter the source buffer
     srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
-    srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt2Reg);
+    srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt4Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters);
-    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters);
 
     // add and saturate the results together
     srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
 
     // filter the source buffer
-    srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt4Reg);
+    srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt2Reg);
     srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
-    srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+    srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
     srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
 
     // add and saturate the results together
@@ -260,21 +262,21 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
 
     // filter the source buffer
     srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg);
-    srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt2Reg);
+    srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt4Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
     srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters);
-    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
+    srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters);
 
     // add and saturate the results together
     srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
 
     // filter the source buffer
-    srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt4Reg);
+    srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt2Reg);
     srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg);
 
     // multiply 2 adjacent elements with the filter and add the result
-    srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
+    srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
     srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
 
     // add and saturate the results together
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index 634fa7746..fd781d4bc 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -272,22 +272,23 @@
     punpcklbw   xmm2, xmm3                  ;C D
     punpcklbw   xmm4, xmm5                  ;E F
 
-
     movq        xmm6, [rsi + rbx + 8]       ;G
     movq        xmm7, [rax + rbx + 8]       ;H
     punpcklbw   xmm6, xmm7                  ;G H
 
-
     pmaddubsw   xmm0, k0k1
     pmaddubsw   xmm2, k2k3
     pmaddubsw   xmm4, k4k5
     pmaddubsw   xmm6, k6k7
 
     paddsw      xmm0, xmm6
-    paddsw      xmm0, xmm2
+    movdqa      xmm1, xmm2
+    pmaxsw      xmm2, xmm4
+    pminsw      xmm4, xmm1
     paddsw      xmm0, xmm4
-    paddsw      xmm0, krd
+    paddsw      xmm0, xmm2
 
+    paddsw      xmm0, krd
     psraw       xmm0, 7
     packuswb    xmm0, xmm0
 
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index d6482761b..121b1f2cd 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1317,16 +1317,15 @@ static struct vp9_read_bit_buffer* init_read_bit_buffer(
   return rb;
 }
 
-int vp9_decode_frame(VP9Decoder *pbi,
-                     const uint8_t *data, const uint8_t *data_end,
-                     const uint8_t **p_data_end) {
+void vp9_decode_frame(VP9Decoder *pbi,
+                      const uint8_t *data, const uint8_t *data_end,
+                      const uint8_t **p_data_end) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
   struct vp9_read_bit_buffer rb = { 0 };
   uint8_t clear_data[MAX_VP9_HEADER_SIZE];
   const size_t first_partition_size = read_uncompressed_header(pbi,
       init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
-  const int keyframe = cm->frame_type == KEY_FRAME;
   const int tile_rows = 1 << cm->log2_tile_rows;
   const int tile_cols = 1 << cm->log2_tile_cols;
   YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
@@ -1335,12 +1334,9 @@ int vp9_decode_frame(VP9Decoder *pbi,
   if (!first_partition_size) {
     // showing a frame directly
     *p_data_end = data + 1;
-    return 0;
+    return;
   }
 
-  if (!pbi->decoded_key_frame && !keyframe)
-    return -1;
-
   data += vp9_rb_bytes_read(&rb);
   if (!read_is_valid(data, first_partition_size, data_end))
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
@@ -1377,14 +1373,6 @@ int vp9_decode_frame(VP9Decoder *pbi,
 
   new_fb->corrupted |= xd->corrupted;
 
-  if (!pbi->decoded_key_frame) {
-    if (keyframe && !new_fb->corrupted)
-      pbi->decoded_key_frame = 1;
-    else
-      vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                         "A stream must start with a complete key frame");
-  }
-
   if (!new_fb->corrupted) {
     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
       vp9_adapt_coef_probs(cm);
@@ -1400,6 +1388,4 @@ int vp9_decode_frame(VP9Decoder *pbi,
 
   if (cm->refresh_frame_context)
     cm->frame_contexts[cm->frame_context_idx] = cm->fc;
-
-  return 0;
 }
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index 8a19dafc5..fb15645a9 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -21,9 +21,9 @@ struct VP9Decoder;
 
 void vp9_init_dequantizer(struct VP9Common *cm);
 
-int vp9_decode_frame(struct VP9Decoder *pbi,
-                     const uint8_t *data, const uint8_t *data_end,
-                     const uint8_t **p_data_end);
+void vp9_decode_frame(struct VP9Decoder *pbi,
+                      const uint8_t *data, const uint8_t *data_end,
+                      const uint8_t **p_data_end);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 8902f179d..13d79ff44 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -67,7 +67,6 @@ VP9Decoder *vp9_decoder_create() {
 
   cm->current_video_frame = 0;
   pbi->ready_for_new_data = 1;
-  pbi->decoded_key_frame = 0;
 
   // vp9_init_dequantizer() is first called here. Add check in
   // frame_init_dequantizer() to avoid unnecessary calling of
@@ -267,15 +266,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
 
   cm->error.setjmp = 1;
 
-  retcode = vp9_decode_frame(pbi, source, source + size, psource);
-
-  if (retcode < 0) {
-    cm->error.error_code = VPX_CODEC_ERROR;
-    cm->error.setjmp = 0;
-    if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
-      cm->frame_bufs[cm->new_fb_idx].ref_count--;
-    return retcode;
-  }
+  vp9_decode_frame(pbi, source, source + size, psource);
 
   swap_frame_buffers(pbi);
 
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 1a5576e5a..a6edf0cbd 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -43,8 +43,6 @@ typedef struct VP9Decoder {
 
   int refresh_frame_flags;
 
-  int decoded_key_frame;
-
   VP9Worker lf_worker;
   VP9Worker *tile_workers;
   int num_tile_workers;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 8ef2b2eed..76f5e7bbe 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -20,7 +20,6 @@
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_mvref_common.h"
-#include "vp9/common/vp9_pragmas.h"
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_systemdependent.h"
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ef33fcaf1..6cbc38d79 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -76,18 +76,6 @@ typedef struct {
   unsigned int var;
 } diff;
 
-static void get_sse_sum_8x8(const uint8_t *src, int src_stride,
-                            const uint8_t *ref, int ref_stride,
-                            unsigned int *sse, int *sum) {
-  variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum);
-}
-
-static void get_sse_sum_16x16(const uint8_t *src, int src_stride,
-                              const uint8_t *ref, int ref_stride,
-                              unsigned int *sse, int *sum) {
-  variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum);
-}
-
 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
                                               const struct buf_2d *ref,
                                               BLOCK_SIZE bs) {
@@ -490,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi,
         unsigned int sse = 0;
         int sum = 0;
         if (x_idx < pixels_wide && y_idx < pixels_high)
-          get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
-                          d + y_idx * dp + x_idx, dp, &sse, &sum);
+          vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
+                              d + y_idx * dp + x_idx, dp, &sse, &sum);
         fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
       }
     }
@@ -1226,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
         int b_offset = b_mi_row * MI_SIZE * src_stride +
                        b_mi_col * MI_SIZE;
 
-        get_sse_sum_16x16(src + b_offset, src_stride,
-                          pre_src + b_offset, pre_stride,
-                          &d16[j].sse, &d16[j].sum);
+        vp9_get_sse_sum_16x16(src + b_offset, src_stride,
+                              pre_src + b_offset, pre_stride,
+                              &d16[j].sse, &d16[j].sum);
 
         d16[j].var = d16[j].sse -
             (((uint32_t)d16[j].sum * d16[j].sum) >> 8);
@@ -1303,14 +1291,14 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
   if (row8x8_remaining >= MI_BLOCK_SIZE &&
       col8x8_remaining >= MI_BLOCK_SIZE) {
     this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride,
-                                            pre, pre_stride, 0x7fffffff);
+                                            pre, pre_stride);
     threshold = (1 << 12);
   } else {
     int r, c;
     for (r = 0; r < row8x8_remaining; r += 2)
       for (c = 0; c < col8x8_remaining; c += 2)
-        this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, pre,
-                                                 pre_stride, 0x7fffffff);
+        this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride,
+                                                 pre, pre_stride);
     threshold = (row8x8_remaining * col8x8_remaining) << 6;
   }
 
@@ -1318,7 +1306,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
   return x->in_static_area;
 }
 
-static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
+static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8,
+                         const int motion_thresh) {
   const int mis = cm->mi_stride;
   int block_row, block_col;
 
@@ -1327,8 +1316,8 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
       for (block_col = 0; block_col < 8; ++block_col) {
         const MODE_INFO *prev_mi = prev_mi_8x8[block_row * mis + block_col];
         if (prev_mi) {
-          if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 ||
-              abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8)
+          if (abs(prev_mi->mbmi.mv[0].as_mv.row) > motion_thresh ||
+              abs(prev_mi->mbmi.mv[0].as_mv.col) > motion_thresh)
             return 1;
         }
       }
@@ -2324,7 +2313,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
             || cpi->rc.is_src_frame_alt_ref
             || ((sf->use_lastframe_partitioning ==
                  LAST_FRAME_PARTITION_LOW_MOTION) &&
-                 sb_has_motion(cm, prev_mi))) {
+                 sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
           // If required set upper and lower partition size limits
           if (sf->auto_min_max_partition_size) {
             set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2337,7 +2326,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                             cpi->pc_root);
         } else {
           if (sf->constrain_copy_partition &&
-              sb_has_motion(cm, prev_mi))
+              sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
             constrain_copy_partitioning(cpi, tile, mi, prev_mi,
                                         mi_row, mi_col, BLOCK_16X16);
           else
@@ -2869,7 +2858,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       if (mi_row + hbs < cm->mi_rows) {
         nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                             &rate, &dist, subsize);
-        pc_tree->horizontal[1].mic.mbmi = mi[0]->mbmi;
+        pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 1f68f03c4..03bf4b0b6 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -632,9 +632,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
     cpi->oxcf.maximum_buffer_size     = 240000;
   }
 
-  // Convert target bandwidth from Kbit/s to Bit/s
-  cpi->oxcf.target_bandwidth       *= 1000;
-
   cpi->oxcf.starting_buffer_level =
       vp9_rescale(cpi->oxcf.starting_buffer_level,
                   cpi->oxcf.target_bandwidth, 1000);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 9929ae101..0b9a5ac7a 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -64,8 +64,7 @@
 #define MIN_GF_INTERVAL             4
 #endif
 
-
-// #define LONG_TERM_VBR_CORRECTION
+#define LONG_TERM_VBR_CORRECTION
 
 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
   YV12_BUFFER_CONFIG temp = *a;
@@ -259,25 +258,14 @@ static void avg_stats(FIRSTPASS_STATS *section) {
 
 // Calculate a modified Error used in distributing bits between easier and
 // harder frames.
-static double calculate_modified_err(const VP9_COMP *cpi,
+static double calculate_modified_err(const TWO_PASS *twopass,
+                                     const VP9EncoderConfig *oxcf,
                                      const FIRSTPASS_STATS *this_frame) {
-  const TWO_PASS *twopass = &cpi->twopass;
-  const SVC *const svc = &cpi->svc;
-  const FIRSTPASS_STATS *stats;
-  double av_err;
-  double modified_error;
-
-  if (svc->number_spatial_layers > 1 &&
-      svc->number_temporal_layers == 1) {
-    twopass = &svc->layer_context[svc->spatial_layer_id].twopass;
-  }
-
-  stats = &twopass->total_stats;
-  av_err = stats->ssim_weighted_pred_err / stats->count;
-  modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
-                   DOUBLE_DIVIDE_CHECK(av_err),
-                   cpi->oxcf.two_pass_vbrbias / 100.0);
-
+  const FIRSTPASS_STATS *const stats = &twopass->total_stats;
+  const double av_err = stats->ssim_weighted_pred_err / stats->count;
+  const double modified_error = av_err *
+      pow(this_frame->ssim_weighted_pred_err / DOUBLE_DIVIDE_CHECK(av_err),
+          oxcf->two_pass_vbrbias / 100.0);
   return fclamp(modified_error,
                 twopass->modified_error_min, twopass->modified_error_max);
 }
@@ -1014,25 +1002,19 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
   // Scan the first pass file and calculate a modified total error based upon
   // the bias/power function used to allocate bits.
   {
-    const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
-    FIRSTPASS_STATS this_frame;
-    const double av_error = stats->ssim_weighted_pred_err /
-                                DOUBLE_DIVIDE_CHECK(stats->count);
-
-
-    twopass->modified_error_total = 0.0;
-    twopass->modified_error_min =
-        (av_error * oxcf->two_pass_vbrmin_section) / 100;
-    twopass->modified_error_max =
-        (av_error * oxcf->two_pass_vbrmax_section) / 100;
-
-    while (input_stats(twopass, &this_frame) != EOF) {
-      twopass->modified_error_total +=
-          calculate_modified_err(cpi, &this_frame);
+    const double avg_error = stats->ssim_weighted_pred_err /
+                                 DOUBLE_DIVIDE_CHECK(stats->count);
+    const FIRSTPASS_STATS *s = twopass->stats_in;
+    double modified_error_total = 0.0;
+    twopass->modified_error_min = (avg_error *
+                                      oxcf->two_pass_vbrmin_section) / 100;
+    twopass->modified_error_max = (avg_error *
+                                      oxcf->two_pass_vbrmax_section) / 100;
+    while (s < twopass->stats_in_end) {
+      modified_error_total += calculate_modified_err(twopass, oxcf, s);
+      ++s;
     }
-    twopass->modified_error_left = twopass->modified_error_total;
-
-    reset_fpf_position(twopass, start_pos);
+    twopass->modified_error_left = modified_error_total;
   }
 
   // Reset the vbr bits off target counter
@@ -1153,16 +1135,17 @@ static void accumulate_frame_motion_stats(
 }
 
 // Calculate a baseline boost number for the current frame.
-static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame,
+static double calc_frame_boost(const TWO_PASS *twopass,
+                               const FIRSTPASS_STATS *this_frame,
                                double this_frame_mv_in_out) {
   double frame_boost;
 
   // Underlying boost factor is based on inter intra error ratio.
-  if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
+  if (this_frame->intra_error > twopass->gf_intra_err_min)
     frame_boost = (IIFACTOR * this_frame->intra_error /
                    DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
   else
-    frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
+    frame_boost = (IIFACTOR * twopass->gf_intra_err_min /
                    DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
 
   // Increase boost for frames where new data coming into frame (e.g. zoom out).
@@ -1215,8 +1198,8 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
                           ? MIN_DECAY_FACTOR : decay_accumulator;
     }
 
-    boost_score += (decay_accumulator *
-                    calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));
+    boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+                                                        this_frame_mv_in_out);
   }
 
   *f_boost = (int)boost_score;
@@ -1252,8 +1235,8 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
                               ? MIN_DECAY_FACTOR : decay_accumulator;
     }
 
-    boost_score += (decay_accumulator *
-                    calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));
+    boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+                                                        this_frame_mv_in_out);
   }
   *b_boost = (int)boost_score;
 
@@ -1471,6 +1454,66 @@ static int calculate_boost_bits(int frame_count,
   return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
 }
 
+static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
+                                   double group_error, int gf_arf_bits) {
+  RATE_CONTROL *const rc = &cpi->rc;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+  TWO_PASS *twopass = &cpi->twopass;
+  FIRSTPASS_STATS frame_stats;
+  int i;
+  int group_frame_index = 1;
+  int target_frame_size;
+  int key_frame;
+  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
+  int64_t total_group_bits = gf_group_bits;
+  double modified_err = 0.0;
+  double err_fraction;
+
+  key_frame = cpi->common.frame_type == KEY_FRAME ||
+              vp9_is_upper_layer_key_frame(cpi);
+
+  // For key frames the frame target rate is already set and it
+  // is also the golden frame.
+  // NOTE: We dont bother to check for the special case of ARF overlay
+  // frames here, as there is clamping code for this in the function
+  // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
+  // encodes.
+  if (!key_frame) {
+    twopass->gf_group_bit_allocation[0] = gf_arf_bits;
+
+    // Step over the golden frame / overlay frame
+    if (EOF == input_stats(twopass, &frame_stats))
+      return;
+  }
+
+  // Store the bits to spend on the ARF if there is one.
+  if (rc->source_alt_ref_pending) {
+    twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits;
+  }
+
+  // Deduct the boost bits for arf or gf if it is not a key frame.
+  if (rc->source_alt_ref_pending || !key_frame)
+    total_group_bits -= gf_arf_bits;
+
+  // Allocate bits to the other frames in the group.
+  for (i = 0; i < rc->baseline_gf_interval - 1; ++i) {
+    if (EOF == input_stats(twopass, &frame_stats))
+      break;
+
+    modified_err = calculate_modified_err(twopass, oxcf, &frame_stats);
+
+    if (group_error > 0)
+      err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
+    else
+      err_fraction = 0.0;
+
+    target_frame_size = (int)((double)total_group_bits * err_fraction);
+    target_frame_size = clamp(target_frame_size, 0,
+                              MIN(max_bits, (int)total_group_bits));
+
+    twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size;
+  }
+}
 
 // Analyse and define a gf/arf group.
 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
@@ -1503,14 +1546,24 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int b_boost = 0;
   int flash_detected;
   int active_max_gf_interval;
+  int64_t gf_group_bits;
+  double gf_group_error_left;
+  int gf_arf_bits;
+
+  // Reset the GF group data structures unless this is a key
+  // frame in which case it will already have been done.
+  if (cpi->common.frame_type != KEY_FRAME) {
+    twopass->gf_group_index = 0;
+    vp9_zero(twopass->gf_group_bit_allocation);
+  }
 
   vp9_clear_system_state();
   vp9_zero(next_frame);
 
-  twopass->gf_group_bits = 0;
+  gf_group_bits = 0;
 
   // Load stats for the current frame.
-  mod_frame_err = calculate_modified_err(cpi, this_frame);
+  mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
 
   // Note the error of the frame at the start of the group. This will be
   // the GF frame error if we code a normal gf.
@@ -1542,7 +1595,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     ++i;
 
     // Accumulate error score of frames in this gf group.
-    mod_frame_err = calculate_modified_err(cpi, this_frame);
+    mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
     gf_group_err += mod_frame_err;
 
     if (EOF == input_stats(twopass, &next_frame))
@@ -1581,8 +1634,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     }
 
     // Calculate a boost number for this frame.
-    boost_score += (decay_accumulator *
-       calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out));
+    boost_score += decay_accumulator * calc_frame_boost(twopass, &next_frame,
+                                                        this_frame_mv_in_out);
 
     // Break out conditions.
     if (
@@ -1617,7 +1670,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
         break;
 
       if (i < rc->frames_to_key) {
-        mod_frame_err = calculate_modified_err(cpi, this_frame);
+        mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
         gf_group_err += mod_frame_err;
       }
     }
@@ -1713,7 +1766,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   reset_fpf_position(twopass, start_pos);
 
   // Calculate the bits to be allocated to the gf/arf group as a whole
-  twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
+  gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
 
   // Calculate the extra bits to be used for boosted frame(s)
   {
@@ -1724,19 +1777,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200);
 
     // Calculate the extra bits to be used for boosted frame(s)
-    twopass->gf_bits = calculate_boost_bits(rc->baseline_gf_interval,
-                                            boost, twopass->gf_group_bits);
-
-
-    // For key frames the frame target rate is set already.
-    // NOTE: We dont bother to check for the special case of ARF overlay
-    // frames here, as there is clamping code for this in the function
-    // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
-    // encodes.
-    if (cpi->common.frame_type != KEY_FRAME &&
-        !vp9_is_upper_layer_key_frame(cpi)) {
-      vp9_rc_set_frame_target(cpi, twopass->gf_bits);
-    }
+    gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
+                                       boost, gf_group_bits);
   }
 
   // Adjust KF group bits and error remaining.
@@ -1749,14 +1791,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // For normal GFs remove the score for the GF itself unless this is
   // also a key frame in which case it has already been accounted for.
   if (rc->source_alt_ref_pending) {
-    twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err);
+    gf_group_error_left = gf_group_err - mod_frame_err;
   } else if (cpi->common.frame_type != KEY_FRAME) {
-    twopass->gf_group_error_left = (int64_t)(gf_group_err
-                                                 - gf_first_frame_err);
+    gf_group_error_left = gf_group_err - gf_first_frame_err;
   } else {
-    twopass->gf_group_error_left = (int64_t)gf_group_err;
+    gf_group_error_left = gf_group_err;
   }
 
+  // Allocate bits to each of the frames in the GF group.
+  allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
+
+  // Reset the file position.
+  reset_fpf_position(twopass, start_pos);
+
   // Calculate a section intra ratio used in setting max loop filter.
   if (cpi->common.frame_type != KEY_FRAME) {
     twopass->section_intra_rating =
@@ -1765,37 +1812,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   }
 }
 
-// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
-static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  TWO_PASS *twopass = &cpi->twopass;
-  // For a single frame.
-  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
-  // Calculate modified prediction error used in bit allocation.
-  const double modified_err = calculate_modified_err(cpi, this_frame);
-  int target_frame_size;
-  double err_fraction;
-
-  if (twopass->gf_group_error_left > 0)
-    // What portion of the remaining GF group error is used by this frame.
-    err_fraction = modified_err / twopass->gf_group_error_left;
-  else
-    err_fraction = 0.0;
-
-  // How many of those bits available for allocation should we give it?
-  target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction);
-
-  // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at
-  // the top end.
-  target_frame_size = clamp(target_frame_size, 0,
-                            MIN(max_bits, (int)twopass->gf_group_bits));
-
-  // Adjust error and bits remaining.
-  twopass->gf_group_error_left -= (int64_t)modified_err;
-
-  // Per frame bit target for this frame.
-  vp9_rc_set_frame_target(cpi, target_frame_size);
-}
-
 static int test_candidate_kf(TWO_PASS *twopass,
                              const FIRSTPASS_STATS *last_frame,
                              const FIRSTPASS_STATS *this_frame,
@@ -1877,10 +1893,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int i, j;
   RATE_CONTROL *const rc = &cpi->rc;
   TWO_PASS *const twopass = &cpi->twopass;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   const FIRSTPASS_STATS first_frame = *this_frame;
   const FIRSTPASS_STATS *const start_position = twopass->stats_in;
   FIRSTPASS_STATS next_frame;
   FIRSTPASS_STATS last_frame;
+  int kf_bits = 0;
   double decay_accumulator = 1.0;
   double zero_motion_accumulator = 1.0;
   double boost_score = 0.0;
@@ -1892,6 +1910,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   cpi->common.frame_type = KEY_FRAME;
 
+  // Reset the GF group data structures.
+  twopass->gf_group_index = 0;
+  vp9_zero(twopass->gf_group_bit_allocation);
+
   // Is this a forced key frame by interval.
   rc->this_key_frame_forced = rc->next_key_frame_forced;
 
@@ -1906,14 +1928,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   twopass->kf_group_bits = 0;        // Total bits available to kf group
   twopass->kf_group_error_left = 0;  // Group modified error score.
 
-  kf_mod_err = calculate_modified_err(cpi, this_frame);
+  kf_mod_err = calculate_modified_err(twopass, oxcf, this_frame);
 
   // Find the next keyframe.
   i = 0;
   while (twopass->stats_in < twopass->stats_in_end &&
          rc->frames_to_key < cpi->oxcf.key_freq) {
     // Accumulate kf group error.
-    kf_group_err += calculate_modified_err(cpi, this_frame);
+    kf_group_err += calculate_modified_err(twopass, oxcf, this_frame);
 
     // Load the next frame's stats.
     last_frame = *this_frame;
@@ -1975,7 +1997,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
     // Rescan to get the correct error data for the forced kf group.
     for (i = 0; i < rc->frames_to_key; ++i) {
-      kf_group_err += calculate_modified_err(cpi, &tmp_frame);
+      kf_group_err += calculate_modified_err(twopass, oxcf, &tmp_frame);
       input_stats(twopass, &tmp_frame);
     }
     rc->next_key_frame_forced = 1;
@@ -1989,7 +2011,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Special case for the last key frame of the file.
   if (twopass->stats_in >= twopass->stats_in_end) {
     // Accumulate kf group error.
-    kf_group_err += calculate_modified_err(cpi, this_frame);
+    kf_group_err += calculate_modified_err(twopass, oxcf, this_frame);
   }
 
   // Calculate the number of bits that should be assigned to the kf group.
@@ -2057,6 +2079,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     }
   }
 
+  reset_fpf_position(twopass, start_position);
+
   // Store the zero motion percentage
   twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
 
@@ -2073,13 +2097,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   if (rc->kf_boost   < MIN_KF_BOOST)
     rc->kf_boost = MIN_KF_BOOST;
 
-  twopass->kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
-                                          rc->kf_boost, twopass->kf_group_bits);
+  kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
+                                  rc->kf_boost, twopass->kf_group_bits);
 
-  twopass->kf_group_bits -= twopass->kf_bits;
+  twopass->kf_group_bits -= kf_bits;
 
-  // Per frame bit target for this frame.
-  vp9_rc_set_frame_target(cpi, twopass->kf_bits);
+  // Save the bits to spend on the key frame.
+  twopass->gf_group_bit_allocation[0] = kf_bits;
 
   // Note the total error score of the kf group minus the key frame itself.
   twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -2128,7 +2152,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   FIRSTPASS_STATS this_frame;
   FIRSTPASS_STATS this_frame_copy;
 
-  int target;
+  int target_rate;
   LAYER_CONTEXT *lc = NULL;
   const int is_spatial_svc = (cpi->use_svc &&
                               cpi->svc.number_temporal_layers == 1);
@@ -2144,16 +2168,23 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   if (!twopass->stats_in)
     return;
 
+  // Increment the gf group index.
+  ++twopass->gf_group_index;
+
+  // If this is an arf frame then we dont want to read the stats file or
+  // advance the input pointer as we already have what we need.
   if (cpi->refresh_alt_ref_frame) {
-    int modified_target = twopass->gf_bits;
-    rc->base_frame_target = twopass->gf_bits;
-    cm->frame_type = INTER_FRAME;
+    int target_rate;
+    target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
+    target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
+    rc->base_frame_target = target_rate;
 #ifdef LONG_TERM_VBR_CORRECTION
     // Correction to rate target based on prior over or under shoot.
     if (cpi->oxcf.rc_mode == RC_MODE_VBR)
-      vbr_rate_correction(&modified_target, rc->vbr_bits_off_target);
+      vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
 #endif
-    vp9_rc_set_frame_target(cpi, modified_target);
+    vp9_rc_set_frame_target(cpi, target_rate);
+    cm->frame_type = INTER_FRAME;
     return;
   }
 
@@ -2181,11 +2212,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   if (EOF == input_stats(twopass, &this_frame))
     return;
 
+  // Local copy of the current frame's first pass stats.
+  this_frame_copy = this_frame;
+
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 ||
       (cpi->frame_flags & FRAMEFLAGS_KEY)) {
     // Define next KF group and assign bits to it.
-    this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
   } else {
     cm->frame_type = INTER_FRAME;
@@ -2204,11 +2237,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     }
   }
 
-  // Is this frame a GF / ARF? (Note: a key frame is always also a GF).
+  // Define a new GF/ARF group. (Should always enter here for key frames).
   if (rc->frames_till_gf_update_due == 0) {
-    // Define next gf group and assign bits to it.
-    this_frame_copy = this_frame;
-
 #if CONFIG_MULTIPLE_ARF
     if (cpi->multi_arf_enabled) {
       define_fixed_arf_period(cpi);
@@ -2231,11 +2261,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
     cpi->refresh_golden_frame = 1;
-  } else {
-    // Otherwise this is an ordinary frame.
-    // Assign bits from those allocated to the GF group.
-    this_frame_copy =  this_frame;
-    assign_std_frame_bits(cpi, &this_frame_copy);
   }
 
   {
@@ -2246,18 +2271,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     }
   }
 
+  target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
   if (cpi->common.frame_type == KEY_FRAME)
-    target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target);
+    target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
   else
-    target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target);
+    target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
 
-  rc->base_frame_target = target;
+  rc->base_frame_target = target_rate;
 #ifdef LONG_TERM_VBR_CORRECTION
   // Correction to rate target based on prior over or under shoot.
   if (cpi->oxcf.rc_mode == RC_MODE_VBR)
-    vbr_rate_correction(&target, rc->vbr_bits_off_target);
+    vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
 #endif
-  vp9_rc_set_frame_target(cpi, target);
+  vp9_rc_set_frame_target(cpi, target_rate);
 
   // Update the total stats remaining structure.
   subtract_stats(&twopass->total_left_stats, &this_frame);
@@ -2306,8 +2332,6 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
   } else {
 #endif
     twopass->kf_group_bits -= bits_used;
-    twopass->gf_group_bits -= bits_used;
-    twopass->gf_group_bits = MAX(twopass->gf_group_bits, 0);
   }
   twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0);
 }
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index d84793e28..309638c1e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -11,6 +11,8 @@
 #ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
 #define VP9_ENCODER_VP9_FIRSTPASS_H_
 
+#include "vp9/encoder/vp9_lookahead.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -51,31 +53,24 @@ typedef struct {
   int64_t bits_left;
   double modified_error_min;
   double modified_error_max;
-  double modified_error_total;
   double modified_error_left;
   double kf_intra_err_min;
   double gf_intra_err_min;
-  int kf_bits;
-  // Remaining error from uncoded frames in a gf group. Two pass use only
-  int64_t gf_group_error_left;
 
   // Projected total bits available for a key frame group of frames
   int64_t kf_group_bits;
 
   // Error score of frames still to be coded in kf group
   int64_t kf_group_error_left;
-
-  // Projected Bits available for a group of frames including 1 GF or ARF
-  int64_t gf_group_bits;
-  // Bits for the golden frame or ARF - 2 pass only
-  int gf_bits;
-
   int sr_update_lag;
 
   int kf_zeromotion_pct;
   int gf_zeromotion_pct;
 
   int active_worst_quality;
+
+  int gf_group_index;
+  int gf_group_bit_allocation[MAX_LAG_BUFFERS * 2];
 } TWO_PASS;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 041e583fd..842bc5b9d 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -72,8 +72,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
   x->mv_row_max = tmp_row_max;
 
   return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-          xd->plane[0].dst.buf, xd->plane[0].dst.stride,
-          INT_MAX);
+          xd->plane[0].dst.buf, xd->plane[0].dst.stride);
 }
 
 static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
@@ -86,8 +85,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
   // Try zero MV first
   // FIXME should really use something like near/nearest MV and/or MV prediction
   err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
-                     INT_MAX);
+                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
   dst_mv->as_int = 0;
 
   // Test last reference frame using the previous best mv as the
@@ -123,8 +121,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) {
   // Try zero MV first
   // FIXME should really use something like near/nearest MV and/or MV prediction
   err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
-                     INT_MAX);
+                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
 
   dst_mv->as_int = 0;
 
@@ -147,7 +144,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                             0, 0, 0);
     err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                       xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err);
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride);
 
     // find best
     if (err < best_err) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 4f7d6f17c..9d2b2a497 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -524,9 +524,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
 
   // Work out the start point for the search
   bestsad = vfp->sdf(what->buf, what->stride,
-                     get_buf_from_mv(in_what, ref_mv), in_what->stride,
-                     0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv,
-                                                  sad_per_bit);
+                     get_buf_from_mv(in_what, ref_mv), in_what->stride);
 
   // Search all possible scales upto the search param around the center point
   // pick the scale of the point that is best as the starting scale of
@@ -542,7 +540,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
                               bc + candidates[t][i].col};
           thissad = vfp->sdf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &this_mv),
-                             in_what->stride, bestsad);
+                             in_what->stride);
           CHECK_BETTER
         }
       } else {
@@ -553,7 +551,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
             continue;
           thissad = vfp->sdf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &this_mv),
-                             in_what->stride, bestsad);
+                             in_what->stride);
           CHECK_BETTER
         }
       }
@@ -585,7 +583,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
                                 bc + candidates[s][i].col};
             thissad = vfp->sdf(what->buf, what->stride,
                                get_buf_from_mv(in_what, &this_mv),
-                               in_what->stride, bestsad);
+                               in_what->stride);
             CHECK_BETTER
           }
         } else {
@@ -596,7 +594,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
               continue;
             thissad = vfp->sdf(what->buf, what->stride,
                                get_buf_from_mv(in_what, &this_mv),
-                               in_what->stride, bestsad);
+                               in_what->stride);
             CHECK_BETTER
           }
         }
@@ -623,7 +621,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
                                 bc + candidates[s][next_chkpts_indices[i]].col};
             thissad = vfp->sdf(what->buf, what->stride,
                                get_buf_from_mv(in_what, &this_mv),
-                               in_what->stride, bestsad);
+                               in_what->stride);
             CHECK_BETTER
           }
         } else {
@@ -634,7 +632,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
               continue;
             thissad = vfp->sdf(what->buf, what->stride,
                                get_buf_from_mv(in_what, &this_mv),
-                               in_what->stride, bestsad);
+                               in_what->stride);
             CHECK_BETTER
           }
         }
@@ -661,7 +659,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
                               bc + neighbors[i].col};
           thissad = vfp->sdf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &this_mv),
-                             in_what->stride, bestsad);
+                             in_what->stride);
           CHECK_BETTER
         }
       } else {
@@ -672,7 +670,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
             continue;
           thissad = vfp->sdf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &this_mv),
-                             in_what->stride, bestsad);
+                             in_what->stride);
           CHECK_BETTER
         }
       }
@@ -894,8 +892,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
   *best_mv = *ref_mv;
   *num00 = 11;
   best_sad = fn_ptr->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, ref_mv), in_what->stride,
-                         0x7fffffff) +
+                         get_buf_from_mv(in_what, ref_mv), in_what->stride) +
                  mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   start_row = MAX(-range, x->mv_row_min - ref_mv->row);
   start_col = MAX(-range, x->mv_col_min - ref_mv->col);
@@ -929,7 +926,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
         for (i = 0; i < end_col - c; ++i) {
           const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
           unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
-              get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+              get_buf_from_mv(in_what, &mv), in_what->stride);
           if (sad < best_sad) {
             sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
             if (sad < best_sad) {
@@ -975,7 +972,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
 
   // Check the starting position
   best_sad = fn_ptr->sdf(what->buf, what->stride,
-                         best_address, in_what->stride, 0x7fffffff) +
+                         best_address, in_what->stride) +
       mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
@@ -986,8 +983,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                      best_mv->col + ss[i].mv.col};
       if (is_mv_in(x, &mv)) {
        int sad = fn_ptr->sdf(what->buf, what->stride,
-                             best_address + ss[i].offset, in_what->stride,
-                             best_sad);
+                             best_address + ss[i].offset, in_what->stride);
         if (sad < best_sad) {
           sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
           if (sad < best_sad) {
@@ -1012,7 +1008,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
         if (is_mv_in(x, &this_mv)) {
           int sad = fn_ptr->sdf(what->buf, what->stride,
                                 best_address + ss[best_site].offset,
-                                in_what->stride, best_sad);
+                                in_what->stride);
           if (sad < best_sad) {
             sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (sad < best_sad) {
@@ -1077,7 +1073,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
   best_address = in_what;
 
   // Check the starting position
-  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
                 + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
@@ -1129,7 +1125,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
         if (is_mv_in(x, &this_mv)) {
           const uint8_t *const check_here = ss[i].offset + best_address;
           unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
-                                             in_what_stride, bestsad);
+                                             in_what_stride);
 
           if (thissad < bestsad) {
             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
@@ -1154,7 +1150,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
         if (is_mv_in(x, &this_mv)) {
           const uint8_t *const check_here = ss[best_site].offset + best_address;
           unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
-                                             in_what_stride, bestsad);
+                                             in_what_stride);
           if (thissad < bestsad) {
             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
                                       mvjsadcost, mvsadcost, sad_per_bit);
@@ -1253,7 +1249,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
   const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   int best_sad = fn_ptr->sdf(what->buf, what->stride,
-      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   *best_mv = *ref_mv;
 
@@ -1261,7 +1257,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
     for (c = col_min; c < col_max; ++c) {
       const MV mv = {r, c};
       const int sad = fn_ptr->sdf(what->buf, what->stride,
-          get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
+          get_buf_from_mv(in_what, &mv), in_what->stride) +
               mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
       if (sad < best_sad) {
         best_sad = sad;
@@ -1286,7 +1282,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
   const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
-      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   *best_mv = *ref_mv;
 
@@ -1320,7 +1316,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
 
     while (c < col_max) {
       unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
-                                     check_here, in_what->stride, best_sad);
+                                     check_here, in_what->stride);
       if (sad < best_sad) {
         const MV mv = {r, c};
         sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
@@ -1351,7 +1347,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
   const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
-      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
       mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   *best_mv = *ref_mv;
 
@@ -1409,7 +1405,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
 
     while (c < col_max) {
       unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
-                                     check_here, in_what->stride, best_sad);
+                                     check_here, in_what->stride);
       if (sad < best_sad) {
         const MV mv = {r, c};
         sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
@@ -1438,7 +1434,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
                                      get_buf_from_mv(in_what, ref_mv),
-                                     in_what->stride, 0x7fffffff) +
+                                     in_what->stride) +
       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
@@ -1450,7 +1446,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
                      ref_mv->col + neighbors[j].col};
       if (is_mv_in(x, &mv)) {
         unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
-            get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+            get_buf_from_mv(in_what, &mv), in_what->stride);
         if (sad < best_sad) {
           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
@@ -1483,7 +1479,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
-                                    in_what->stride, 0x7fffffff) +
+                                    in_what->stride) +
       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
@@ -1524,7 +1520,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
         if (is_mv_in(x, &mv)) {
           unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
                                          get_buf_from_mv(in_what, &mv),
-                                         in_what->stride, best_sad);
+                                         in_what->stride);
           if (sad < best_sad) {
             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
             if (sad < best_sad) {
@@ -1563,8 +1559,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
-      get_buf_from_mv(in_what, ref_mv), in_what->stride,
-      second_pred, 0x7fffffff) +
+      get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) +
       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
@@ -1577,8 +1572,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
 
       if (is_mv_in(x, &mv)) {
         unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
-            get_buf_from_mv(in_what, &mv), in_what->stride,
-            second_pred, best_sad);
+            get_buf_from_mv(in_what, &mv), in_what->stride, second_pred);
         if (sad < best_sad) {
           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 437b68078..11633a73d 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -284,9 +284,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           (1 << INTER_OFFSET(this_mode)))
         continue;
 
-      if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] *
-          rd_thresh_freq_fact[this_mode] >> 5) ||
-          rd_threshes[mode_idx[this_mode]] == INT_MAX)
+      if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]],
+                              rd_thresh_freq_fact[this_mode]))
         continue;
 
       if (this_mode == NEWMV) {
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index a04622c8c..9ad851754 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -213,6 +213,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
   rc->long_rolling_actual_bits = rc->avg_frame_bandwidth;
 
   rc->total_actual_bits = 0;
+  rc->total_target_bits = 0;
   rc->total_target_vs_actual = 0;
 
   rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
@@ -607,13 +608,27 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
   return q;
 }
 
+static int get_active_cq_level(const RATE_CONTROL *rc,
+                               const VP9EncoderConfig *const oxcf) {
+  static const double cq_adjust_threshold = 0.5;
+  int active_cq_level = oxcf->cq_level;
+  if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY &&
+      rc->total_target_bits > 0) {
+    const double x = (double)rc->total_actual_bits / rc->total_target_bits;
+    if (x < cq_adjust_threshold) {
+      active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold);
+    }
+  }
+  return active_cq_level;
+}
+
 static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
                                              int *bottom_index,
                                              int *top_index) {
   const VP9_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  const int cq_level = oxcf->cq_level;
+  const int cq_level = get_active_cq_level(rc, oxcf);
   int active_best_quality;
   int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
   int q;
@@ -791,7 +806,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
   const VP9_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  const int cq_level = oxcf->cq_level;
+  const int cq_level = get_active_cq_level(rc, oxcf);
   int active_best_quality;
   int active_worst_quality = cpi->twopass.active_worst_quality;
   int q;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 601e64d39..832372479 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -9,7 +9,6 @@
  */
 
 #include <assert.h>
-#include <limits.h>
 #include <math.h>
 #include <stdio.h>
 
@@ -22,7 +21,6 @@
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_mvref_common.h"
-#include "vp9/common/vp9_pragmas.h"
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_reconinter.h"
@@ -2131,8 +2129,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
 
     // Find sad for current vector.
     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
-                                           ref_y_ptr, ref_y_stride,
-                                           0x7fffffff);
+                                           ref_y_ptr, ref_y_stride);
 
     // Note if it is the best so far.
     if (this_sad < best_sad) {
@@ -2991,11 +2988,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   ctx->mic = *xd->mi[0];
 }
 
-static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
-                                      int thresh_fact) {
-    return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
-}
-
 // Updating rd_thresh_freq_fact[] here means that the different
 // partition/block sizes are handled independently based on the best
 // choice for the current partition. It may well be better to keep a scaled
@@ -3228,7 +3220,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     // Test best rd so far against threshold for trying this mode.
     if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
-        rd_thresh_freq_fact[mode_index]))
+                            rd_thresh_freq_fact[mode_index]))
       continue;
 
     this_mode = vp9_mode_order[mode_index].mode;
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index b6b51e553..5ea09a8a7 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -11,6 +11,8 @@
 #ifndef VP9_ENCODER_VP9_RDOPT_H_
 #define VP9_ENCODER_VP9_RDOPT_H_
 
+#include <limits.h>
+
 #include "vp9/encoder/vp9_encoder.h"
 
 #ifdef __cplusplus
@@ -87,6 +89,11 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi);
 
 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi);
 
+static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
+                                      int thresh_fact) {
+    return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
+}
+
 static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
                                     BLOCK_SIZE bsize, MV *mvp_full,
                                     int step_param, int error_per_bit,
diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c
index 892e90551..d06263676 100644
--- a/vp9/encoder/vp9_sad.c
+++ b/vp9/encoder/vp9_sad.c
@@ -35,14 +35,12 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride,
 
 #define sadMxN(m, n) \
 unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
-                                  const uint8_t *ref, int ref_stride, \
-                                  unsigned int max_sad) { \
+                                  const uint8_t *ref, int ref_stride) { \
   return sad(src, src_stride, ref, ref_stride, m, n); \
 } \
 unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
                                       const uint8_t *ref, int ref_stride, \
-                                      const uint8_t *second_pred, \
-                                      unsigned int max_sad) { \
+                                      const uint8_t *second_pred) { \
   uint8_t comp_pred[m * n]; \
   vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
   return sad(src, src_stride, comp_pred, m, m, n); \
@@ -54,8 +52,7 @@ void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
                                 unsigned int *sads) { \
   int i; \
   for (i = 0; i < k; ++i) \
-    sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride, \
-                                   0x7fffffff); \
+    sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \
 }
 
 #define sadMxNx4D(m, n) \
@@ -64,8 +61,7 @@ void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
                              unsigned int *sads) { \
   int i; \
   for (i = 0; i < 4; ++i) \
-    sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride, \
-                                   0x7fffffff); \
+    sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \
 }
 
 // 64x64
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 7b2d1e2f0..7c3abd5d7 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -80,12 +80,16 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   }
 
   if (speed >= 2) {
-    if (MIN(cm->width, cm->height) >= 720)
+    if (MIN(cm->width, cm->height) >= 720) {
+      sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+      sf->last_partitioning_redo_frequency = 3;
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
                                               : DISABLE_ALL_INTER_SPLIT;
-    else
+    } else {
       sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-
+      sf->last_partitioning_redo_frequency = 2;
+      sf->lf_motion_threshold = NO_MOITION_THRESHOLD;
+    }
     sf->adaptive_pred_interp_filter = 2;
     sf->reference_masking = 1;
     sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
@@ -97,7 +101,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
     sf->adjust_partitioning_from_last_frame = 1;
-    sf->last_partitioning_redo_frequency = 3;
   }
 
   if (speed >= 3) {
@@ -108,6 +111,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     else
       sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
 
+    sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+    sf->last_partitioning_redo_frequency = 3;
     sf->recode_loop = ALLOW_RECODE_KFMAXBW;
     sf->adaptive_rd_thresh = 3;
     sf->mode_skip_start = 6;
@@ -142,8 +147,11 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   }
 }
 
-static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
+static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
                                  int speed) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int frames_since_key =
+      cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key;
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
   sf->use_fast_coef_costing = 1;
@@ -196,6 +204,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
 
     sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+    sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
     sf->adjust_partitioning_from_last_frame = 1;
     sf->last_partitioning_redo_frequency = 3;
 
@@ -233,7 +242,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
     sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
     sf->adjust_partitioning_from_last_frame =
         cm->last_frame_type != cm->frame_type || (0 ==
-        (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
+        (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
     sf->subpel_force_stop = 1;
     for (i = 0; i < TX_SIZES; i++) {
       sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
@@ -254,9 +263,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
     sf->max_partition_size = BLOCK_32X32;
     sf->min_partition_size = BLOCK_8X8;
     sf->partition_check =
-        (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1);
+        (frames_since_key % sf->last_partitioning_redo_frequency == 1);
     sf->force_frame_boost = cm->frame_type == KEY_FRAME ||
-        (cm->current_video_frame %
+        (frames_since_key %
             (sf->last_partitioning_redo_frequency << 1) == 1);
     sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
     sf->partition_search_type = REFERENCE_PARTITION;
@@ -356,7 +365,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
       set_good_speed_feature(cpi, cm, sf, oxcf->speed);
       break;
     case REALTIME:
-      set_rt_speed_feature(cm, sf, oxcf->speed);
+      set_rt_speed_feature(cpi, sf, oxcf->speed);
       break;
   }
 
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index d8c1a8be2..a54599e6a 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -44,6 +44,11 @@ typedef enum {
 } SUBPEL_SEARCH_METHODS;
 
 typedef enum {
+  NO_MOITION_THRESHOLD = 0,
+  LOW_MOITION_THRESHOLD = 7
+} MOTION_THRESHOLD;
+
+typedef enum {
   LAST_FRAME_PARTITION_OFF = 0,
   LAST_FRAME_PARTITION_LOW_MOTION = 1,
   LAST_FRAME_PARTITION_ALL = 2
@@ -200,6 +205,10 @@ typedef struct SPEED_FEATURES {
   // partitioning.
   LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
 
+  // The threshold is to determine how slow the motino is, it is used when
+  // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION
+  MOTION_THRESHOLD lf_motion_threshold;
+
   // Determine which method we use to determine transform size. We can choose
   // between options like full rd, largest for prediction size, largest
   // for intra and model coefs for the rest.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index dd28496be..48aa64c13 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -46,10 +46,10 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
     lrc->key_frame_rate_correction_factor = 1.0;
 
     if (svc->number_temporal_layers > 1) {
-      lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+      lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
       lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
     } else {
-      lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+      lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
       lrc->last_q[0] = oxcf->best_allowed_q;
       lrc->last_q[1] = oxcf->best_allowed_q;
       lrc->last_q[2] = oxcf->best_allowed_q;
@@ -82,9 +82,9 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
     RATE_CONTROL *const lrc = &lc->rc;
 
     if (svc->number_temporal_layers > 1) {
-      lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+      lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
     } else {
-      lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+      lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
     }
     bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
     // Update buffer-related quantities.
@@ -132,8 +132,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
   } else {
     const double prev_layer_framerate =
         oxcf->framerate / oxcf->ts_rate_decimator[layer - 1];
-    const int prev_layer_target_bandwidth =
-        oxcf->ts_target_bitrate[layer - 1] * 1000;
+    const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1];
     lc->avg_frame_size =
         (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
               (lc->framerate - prev_layer_framerate));
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 91d8ea4dc..02bed8988 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -156,6 +156,19 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
   return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
 }
 
+
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
+                             const uint8_t *ref_ptr, int ref_stride,
+                             unsigned int *sse, int *sum) {
+  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
+}
+
+void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
+                           const uint8_t *ref_ptr, int ref_stride,
+                           unsigned int *sse, int *sum) {
+  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
+}
+
 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
                             const uint8_t *ref, int ref_stride,
                             unsigned int *sse) {
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index c47fe1335..4a194b72c 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -25,15 +25,13 @@ void variance(const uint8_t *a, int a_stride,
 typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
                                     int source_stride,
                                     const uint8_t *ref_ptr,
-                                    int ref_stride,
-                                    unsigned int max_sad);
+                                    int ref_stride);
 
 typedef unsigned int(*vp9_sad_avg_fn_t)(const uint8_t *src_ptr,
                                         int source_stride,
                                         const uint8_t *ref_ptr,
                                         int ref_stride,
-                                        const uint8_t *second_pred,
-                                        unsigned int max_sad);
+                                        const uint8_t *second_pred);
 
 typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr,
                                    int source_stride,
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
index 8723a7114..8723a7114 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 48ccef8cc..48ccef8cc 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
diff --git a/vp9/encoder/x86/vp9_ssim_opt.asm b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
index 455d10d2c..455d10d2c 100644
--- a/vp9/encoder/x86/vp9_ssim_opt.asm
+++ b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
diff --git a/vp9/encoder/x86/vp9_variance_avx2.c b/vp9/encoder/x86/vp9_variance_avx2.c
index 835c51957..7f81f46b8 100644
--- a/vp9/encoder/x86/vp9_variance_avx2.c
+++ b/vp9/encoder/x86/vp9_variance_avx2.c
@@ -10,7 +10,6 @@
 #include "./vpx_config.h"
 
 #include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_pragmas.h"
 #include "vpx_ports/mem.h"
 
 typedef void (*get_var_avx2) (
diff --git a/vp9/encoder/x86/vp9_variance_mmx.c b/vp9/encoder/x86/vp9_variance_mmx.c
index c4d17fc0f..ae2f976af 100644
--- a/vp9/encoder/x86/vp9_variance_mmx.c
+++ b/vp9/encoder/x86/vp9_variance_mmx.c
@@ -10,7 +10,6 @@
 
 #include "./vpx_config.h"
 #include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_pragmas.h"
 #include "vpx_ports/mem.h"
 
 extern unsigned int vp9_get8x8var_mmx
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 41f225922..6a82038ab 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -11,7 +11,6 @@
 #include "./vpx_config.h"
 
 #include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_pragmas.h"
 #include "vpx_ports/mem.h"
 
 extern unsigned int vp9_get4x4var_mmx
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 3b4d6b901..8c1f34567 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -10,7 +10,6 @@
 
 VP9_COMMON_SRCS-yes += vp9_common.mk
 VP9_COMMON_SRCS-yes += vp9_iface_common.h
-VP9_COMMON_SRCS-yes += common/vp9_pragmas.h
 VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
 VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
 VP9_COMMON_SRCS-yes += common/vp9_blockd.c
@@ -119,9 +118,10 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_mblpf_horiz_loopfilter_d
 VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
 
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
-
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_intrin_ssse3.c
 ifeq ($(ARCH_X86_64), yes)
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
 endif
 
 VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve_neon.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 5a8a4f4fe..ce7c71ef7 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -327,7 +327,8 @@ static vpx_codec_err_t set_encoder_config(
   else if (cfg->rc_end_usage == VPX_CBR)
     oxcf->rc_mode = RC_MODE_CBR;
 
-  oxcf->target_bandwidth         = cfg->rc_target_bitrate;
+  // Convert target bandwidth from Kbit/s to Bit/s
+  oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
   oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
 
   oxcf->best_allowed_q  = vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
@@ -387,7 +388,9 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->ss_number_layers = cfg->ss_number_layers;
 
   if (oxcf->ss_number_layers > 1) {
-    vp9_copy(oxcf->ss_target_bitrate, cfg->ss_target_bitrate);
+    int i;
+    for (i = 0; i < VPX_SS_MAX_LAYERS; ++i)
+      oxcf->ss_target_bitrate[i] =  1000 * cfg->ss_target_bitrate[i];
   } else if (oxcf->ss_number_layers == 1) {
     oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
   }
@@ -395,8 +398,11 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->ts_number_layers = cfg->ts_number_layers;
 
   if (oxcf->ts_number_layers > 1) {
-    vp9_copy(oxcf->ts_target_bitrate, cfg->ts_target_bitrate);
-    vp9_copy(oxcf->ts_rate_decimator, cfg->ts_rate_decimator);
+    int i;
+    for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) {
+      oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i];
+      oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i];
+    }
   } else if (oxcf->ts_number_layers == 1) {
     oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
     oxcf->ts_rate_decimator[0] = 1;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 328b98fee..9cf1735cb 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -260,6 +260,9 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
                                  ctx->decrypt_state);
     if (res != VPX_CODEC_OK)
       return res;
+
+    if (!ctx->si.is_kf)
+      return VPX_CODEC_ERROR;
   }
 
   // Initialize the decoder instance on the first frame
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 6e5c521ab..a44ffc1ca 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -112,12 +112,12 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
 endif
 
 ifeq ($(ARCH_X86_64),yes)
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
 endif
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
 VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
-VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
+VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
 
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 4009a8a42..17e165bfb 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -580,7 +580,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   // TODO(ivanmaltz): determine if these values need to be set explicitly for
   // svc, or if the normal default/override mechanism can be used
   enc_cfg->rc_dropframe_thresh = 0;
-  enc_cfg->rc_end_usage = VPX_CBR;
   enc_cfg->rc_resize_allowed = 0;
 
   if (enc_cfg->g_pass == VPX_RC_ONE_PASS) {
@@ -604,6 +603,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
 
   vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
   vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1);
+  vpx_codec_control(codec_ctx, VP8E_SET_ENABLEAUTOALTREF, 0);
 
   return VPX_CODEC_OK;
 }