18 files changed, 413 insertions, 341 deletions
diff --git a/examples.mk b/examples.mk
index c17fac9ca..7b47ade86 100644
--- a/examples.mk
+++ b/examples.mk
@@ -49,9 +49,9 @@ vpxenc.DESCRIPTION           = Full featured encoder
 UTILS-$(CONFIG_VP8_ENCODER)    += vp8_scalable_patterns.c
 vp8_scalable_patterns.GUID   = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
 vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
-UTILS-$(CONFIG_VP8_ENCODER)    += vp9_spatial_scalable_encoder.c
-vp8_scalable_patterns.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
-vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder
+UTILS-$(CONFIG_VP9_ENCODER)    += vp9_spatial_scalable_encoder.c
+vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
+vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
 
 # Clean up old ivfenc, ivfdec binaries.
 ifeq ($(CONFIG_MSVS),yes)
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
index 5fc5ed2a3..27cfe861c 100644
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -105,7 +105,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
 void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
   int size;
   for (size = 4; size > 1; size--) {
-    if (bin & 0x000000ff << ((size - 1) * 8))
+    if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
       break;
   }
   Ebml_WriteID(glob, class_id);
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 7d49c12d3..0d19aa05b 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -258,7 +258,7 @@ void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
 }
 
 typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
-typedef void (*idct_t)(int16_t *in, uint8_t *out, int stride);
+typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
 typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
 typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
 
@@ -509,7 +509,8 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans16x16DCT,
     ::testing::Values(
-        make_tuple(&vp9_short_fdct16x16_sse2, &vp9_short_idct16x16_add_c, 0)));
+        make_tuple(&vp9_short_fdct16x16_sse2,
+                   &vp9_short_idct16x16_add_sse2, 0)));
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans16x16HT,
     ::testing::Values(
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index ee6c9f6b3..7edb4d042 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,242 +13,309 @@
 #include <string.h>
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
-#include "vpx_ports/mem.h"
+#include "test/util.h"
 
 extern "C" {
+#include "vp9/common/vp9_entropy.h"
 #include "./vp9_rtcd.h"
 void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
 }
-
-#include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
 
 using libvpx_test::ACMRandom;
 
 namespace {
-void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
-             int stride, int /*tx_type*/) {
+typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
+typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
+typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
+typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
+
+void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
   vp9_short_fdct8x8_c(in, out, stride);
 }
-void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
-                 int stride, int /*tx_type*/) {
-  vp9_short_idct8x8_add_c(out, dst, stride >> 1);
-}
-void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
-            int stride, int tx_type) {
-  // TODO(jingning): need to refactor this to test both _c and _sse2 functions,
-  // when we have all inverse dct functions done sse2.
-#if HAVE_SSE2
-  vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
-#else
-  vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
-#endif
-}
-void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
-                int stride, int tx_type) {
-  vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
+
+void fht8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
+  vp9_short_fht8x8_c(in, out, stride, tx_type);
 }
 
-class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
+class FwdTrans8x8TestBase {
  public:
-  virtual ~FwdTrans8x8Test() {}
-  virtual void SetUp() {
-    tx_type_ = GetParam();
-    if (tx_type_ == 0) {
-      fwd_txfm = fdct8x8;
-      inv_txfm = idct8x8_add;
-    } else {
-      fwd_txfm = fht8x8;
-      inv_txfm = iht8x8_add;
-    }
-  }
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+  virtual ~FwdTrans8x8TestBase() {}
 
  protected:
-  void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
-                  int stride, int tx_type) {
-    (*fwd_txfm)(in, out, dst, stride, tx_type);
-  }
-  void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
-                  int stride, int tx_type) {
-    (*inv_txfm)(in, out, dst, stride, tx_type);
-  }
+  virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
+  virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
 
-  int tx_type_;
-  void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
-  void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
-};
+  void RunSignBiasCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
+    int count_sign_block[64][2];
+    const int count_test_block = 100000;
 
-TEST_P(FwdTrans8x8Test, SignBiasCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
-  const int pitch = 16;
-  int count_sign_block[64][2];
-  const int count_test_block = 100000;
+    memset(count_sign_block, 0, sizeof(count_sign_block));
 
-  memset(count_sign_block, 0, sizeof(count_sign_block));
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < 64; ++j)
+        test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+      REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_output_block, pitch_));
 
-  for (int i = 0; i < count_test_block; ++i) {
-    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < 64; ++j)
-      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
-    REGISTER_STATE_CHECK(
-        RunFwdTxfm(test_input_block, test_output_block,
-                   NULL, pitch, tx_type_));
+      for (int j = 0; j < 64; ++j) {
+        if (test_output_block[j] < 0)
+          ++count_sign_block[j][0];
+        else if (test_output_block[j] > 0)
+          ++count_sign_block[j][1];
+      }
+    }
 
     for (int j = 0; j < 64; ++j) {
-      if (test_output_block[j] < 0)
-        ++count_sign_block[j][0];
-      else if (test_output_block[j] > 0)
-        ++count_sign_block[j][1];
+      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+      const int max_diff = 1125;
+      EXPECT_LT(diff, max_diff)
+          << "Error: 8x8 FDCT/FHT has a sign bias > "
+          << 1. * max_diff / count_test_block * 100 << "%"
+          << " for input range [-255, 255] at index " << j
+          << " count0: " << count_sign_block[j][0]
+          << " count1: " << count_sign_block[j][1]
+          << " diff: " << diff;
     }
-  }
 
-  for (int j = 0; j < 64; ++j) {
-    const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-    const int max_diff = 1125;
-    EXPECT_LT(diff, max_diff)
-        << "Error: 8x8 FDCT/FHT has a sign bias > "
-        << 1. * max_diff / count_test_block * 100 << "%"
-        << " for input range [-255, 255] at index " << j
-        << " count0: " << count_sign_block[j][0]
-        << " count1: " << count_sign_block[j][1]
-        << " diff: " << diff;
-  }
+    memset(count_sign_block, 0, sizeof(count_sign_block));
 
-  memset(count_sign_block, 0, sizeof(count_sign_block));
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-15, 15].
+      for (int j = 0; j < 64; ++j)
+        test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
+      REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_output_block, pitch_));
 
-  for (int i = 0; i < count_test_block; ++i) {
-    // Initialize a test block with input range [-15, 15].
-    for (int j = 0; j < 64; ++j)
-      test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
-    REGISTER_STATE_CHECK(
-        RunFwdTxfm(test_input_block, test_output_block,
-                   NULL, pitch, tx_type_));
+      for (int j = 0; j < 64; ++j) {
+        if (test_output_block[j] < 0)
+          ++count_sign_block[j][0];
+        else if (test_output_block[j] > 0)
+          ++count_sign_block[j][1];
+      }
+    }
 
     for (int j = 0; j < 64; ++j) {
-      if (test_output_block[j] < 0)
-        ++count_sign_block[j][0];
-      else if (test_output_block[j] > 0)
-        ++count_sign_block[j][1];
+      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+      const int max_diff = 10000;
+      EXPECT_LT(diff, max_diff)
+          << "Error: 4x4 FDCT/FHT has a sign bias > "
+          << 1. * max_diff / count_test_block * 100 << "%"
+          << " for input range [-15, 15] at index " << j
+          << " count0: " << count_sign_block[j][0]
+          << " count1: " << count_sign_block[j][1]
+          << " diff: " << diff;
     }
   }
 
-  for (int j = 0; j < 64; ++j) {
-    const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-    const int max_diff = 10000;
-    EXPECT_LT(diff, max_diff)
-        << "Error: 4x4 FDCT/FHT has a sign bias > "
-        << 1. * max_diff / count_test_block * 100 << "%"
-        << " for input range [-15, 15] at index " << j
-        << " count0: " << count_sign_block[j][0]
-        << " count1: " << count_sign_block[j][1]
-        << " diff: " << diff;
-  }
-}
-
-TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int max_error = 0;
-  int total_error = 0;
-  const int count_test_block = 100000;
-  for (int i = 0; i < count_test_block; ++i) {
+  void RunRoundTripErrorCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int max_error = 0;
+    int total_error = 0;
+    const int count_test_block = 100000;
     DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
     DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
     DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
     DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
 
-    for (int j = 0; j < 64; ++j) {
-      src[j] = rnd.Rand8();
-      dst[j] = rnd.Rand8();
-    }
-    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < 64; ++j)
-      test_input_block[j] = src[j] - dst[j];
-
-    const int pitch = 16;
-    REGISTER_STATE_CHECK(
-        RunFwdTxfm(test_input_block, test_temp_block,
-                   dst, pitch, tx_type_));
-    for (int j = 0; j < 64; ++j) {
-        if (test_temp_block[j] > 0) {
-          test_temp_block[j] += 2;
-          test_temp_block[j] /= 4;
-          test_temp_block[j] *= 4;
-        } else {
-          test_temp_block[j] -= 2;
-          test_temp_block[j] /= 4;
-          test_temp_block[j] *= 4;
-        }
-    }
-    REGISTER_STATE_CHECK(
-        RunInvTxfm(test_input_block, test_temp_block,
-                   dst, pitch, tx_type_));
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < 64; ++j) {
+        src[j] = rnd.Rand8();
+        dst[j] = rnd.Rand8();
+        test_input_block[j] = src[j] - dst[j];
+      }
 
-    for (int j = 0; j < 64; ++j) {
-      const int diff = dst[j] - src[j];
-      const int error = diff * diff;
-      if (max_error < error)
-        max_error = error;
-      total_error += error;
+      REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      for (int j = 0; j < 64; ++j) {
+          if (test_temp_block[j] > 0) {
+            test_temp_block[j] += 2;
+            test_temp_block[j] /= 4;
+            test_temp_block[j] *= 4;
+          } else {
+            test_temp_block[j] -= 2;
+            test_temp_block[j] /= 4;
+            test_temp_block[j] *= 4;
+          }
+      }
+      REGISTER_STATE_CHECK(
+          RunInvTxfm(test_temp_block, dst, pitch_));
+
+      for (int j = 0; j < 64; ++j) {
+        const int diff = dst[j] - src[j];
+        const int error = diff * diff;
+        if (max_error < error)
+          max_error = error;
+        total_error += error;
+      }
     }
-  }
 
-  EXPECT_GE(1, max_error)
-    << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
+    EXPECT_GE(1, max_error)
+      << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
+      << " roundtrip error > 1";
 
-  EXPECT_GE(count_test_block/5, total_error)
-    << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
-        "error > 1/5 per block";
-}
+    EXPECT_GE(count_test_block/5, total_error)
+      << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
+      << "error > 1/5 per block";
+  }
 
-TEST_P(FwdTrans8x8Test, ExtremalCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int max_error = 0;
-  int total_error = 0;
-  const int count_test_block = 100000;
-  for (int i = 0; i < count_test_block; ++i) {
+  void RunExtremalCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int max_error = 0;
+    int total_error = 0;
+    const int count_test_block = 100000;
     DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
     DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
     DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
     DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
 
-    for (int j = 0; j < 64; ++j) {
-      src[j] = rnd.Rand8() % 2 ? 255 : 0;
-      dst[j] = src[j] > 0 ? 0 : 255;
-    }
-    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < 64; ++j)
-      test_input_block[j] = src[j] - dst[j];
-
-    const int pitch = 16;
-    REGISTER_STATE_CHECK(
-        RunFwdTxfm(test_input_block, test_temp_block,
-                   dst, pitch, tx_type_));
-    REGISTER_STATE_CHECK(
-        RunInvTxfm(test_input_block, test_temp_block,
-                   dst, pitch, tx_type_));
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < 64; ++j) {
+        src[j] = rnd.Rand8() % 2 ? 255 : 0;
+        dst[j] = src[j] > 0 ? 0 : 255;
+        test_input_block[j] = src[j] - dst[j];
+      }
 
-    for (int j = 0; j < 64; ++j) {
-      const int diff = dst[j] - src[j];
-      const int error = diff * diff;
-      if (max_error < error)
-        max_error = error;
-      total_error += error;
+      REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      REGISTER_STATE_CHECK(
+          RunInvTxfm(test_temp_block, dst, pitch_));
+
+      for (int j = 0; j < 64; ++j) {
+        const int diff = dst[j] - src[j];
+        const int error = diff * diff;
+        if (max_error < error)
+          max_error = error;
+        total_error += error;
+      }
+
+      EXPECT_GE(1, max_error)
+          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
+          << "an individual roundtrip error > 1";
+
+      EXPECT_GE(count_test_block/5, total_error)
+          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
+          << " roundtrip error > 1/5 per block";
     }
+  }
 
-    EXPECT_GE(1, max_error)
-        << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
-        << " individual roundtrip error > 1";
+  int pitch_;
+  int tx_type_;
+  fht_t fwd_txfm_ref;
+};
 
-    EXPECT_GE(count_test_block/5, total_error)
-        << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
-        << " roundtrip error > 1/5 per block";
+class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
+                       public PARAMS(fdct_t, idct_t, int) {
+ public:
+  virtual ~FwdTrans8x8DCT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_  = GET_PARAM(2);
+    pitch_    = 16;
+    fwd_txfm_ref = fdct8x8_ref;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride >> 1);
   }
+
+  fdct_t fwd_txfm_;
+  idct_t inv_txfm_;
+};
+
+TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
+  RunSignBiasCheck();
 }
 
-INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
+TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
+  RunRoundTripErrorCheck();
+}
+
+TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
+  RunExtremalCheck();
+}
+
+class FwdTrans8x8HT : public FwdTrans8x8TestBase,
+                      public PARAMS(fht_t, iht_t, int) {
+ public:
+  virtual ~FwdTrans8x8HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_  = GET_PARAM(2);
+    pitch_    = 8;
+    fwd_txfm_ref = fht8x8_ref;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  fht_t fwd_txfm_;
+  iht_t inv_txfm_;
+};
+
+TEST_P(FwdTrans8x8HT, SignBiasCheck) {
+  RunSignBiasCheck();
+}
+
+TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
+  RunRoundTripErrorCheck();
+}
+
+TEST_P(FwdTrans8x8HT, ExtremalCheck) {
+  RunExtremalCheck();
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, FwdTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));
+INSTANTIATE_TEST_CASE_P(
+    C, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 0),
+        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 1),
+        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 2),
+        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 3)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, FwdTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 0),
+        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 1),
+        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 2),
+        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 3)));
+#endif
 }  // namespace
diff --git a/test/variance_test.cc b/test/variance_test.cc
index ca53ffb21..3f5560170 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -78,34 +78,6 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
   return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
 
-static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
-                                            const uint8_t *src,
-                                            const uint8_t *second_pred,
-                                            int l2w, int l2h,
-                                            int xoff, int yoff,
-                                            unsigned int *sse_ptr) {
-  int se = 0;
-  unsigned int sse = 0;
-  const int w = 1 << l2w, h = 1 << l2h;
-  for (int y = 0; y < h; y++) {
-    for (int x = 0; x < w; x++) {
-      // bilinear interpolation at a 16th pel step
-      const int a1 = ref[(w + 1) * (y + 0) + x + 0];
-      const int a2 = ref[(w + 1) * (y + 0) + x + 1];
-      const int b1 = ref[(w + 1) * (y + 1) + x + 0];
-      const int b2 = ref[(w + 1) * (y + 1) + x + 1];
-      const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
-      const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
-      const int r = a + (((b - a) * yoff + 8) >> 4);
-      int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
-      se += diff;
-      sse += diff * diff;
-    }
-  }
-  *sse_ptr = sse;
-  return sse - (((int64_t) se * se) >> (l2w + l2h));
-}
-
 template<typename VarianceFunctionType>
 class VarianceTest
     : public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
@@ -190,6 +162,36 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
   EXPECT_EQ(expected, var);
 }
 
+#if CONFIG_VP9_ENCODER
+
+unsigned int subpel_avg_variance_ref(const uint8_t *ref,
+                                     const uint8_t *src,
+                                     const uint8_t *second_pred,
+                                     int l2w, int l2h,
+                                     int xoff, int yoff,
+                                     unsigned int *sse_ptr) {
+  int se = 0;
+  unsigned int sse = 0;
+  const int w = 1 << l2w, h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+      const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+      const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+      const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+      const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+      const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+      const int r = a + (((b - a) * yoff + 8) >> 4);
+      int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+      se += diff;
+      sse += diff * diff;
+    }
+  }
+  *sse_ptr = sse;
+  return sse - (((int64_t) se * se) >> (l2w + l2h));
+}
+
 template<typename SubpelVarianceFunctionType>
 class SubpelVarianceTest
     : public ::testing::TestWithParam<tuple<int, int,
@@ -280,6 +282,8 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
   }
 }
 
+#endif  // CONFIG_VP9_ENCODER
+
 // -----------------------------------------------------------------------------
 // VP8 test cases.
 
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 0c47da6bd..5f2f0a569 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -25,7 +25,7 @@
 #define WHT_UPSCALE_FACTOR 2
 
 #define pair_set_epi16(a, b) \
-  _mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
+  _mm_set_epi16(b, a, b, a, b, a, b, a)
 
 #define pair_set_epi32(a, b) \
   _mm_set_epi32(b, a, b, a)
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index cfb5cd4a3..6e425e8fb 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -316,13 +316,13 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
       continue;
     }
 
-    intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift);
+    intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift);
     lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
 
     for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
       for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
-        const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift)
-                                      + (lf->mode_deltas[mode] << n_shift);
+        const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift)
+                                      + lf->mode_deltas[mode] * (1 << n_shift);
         lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
       }
   }
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 0431e146f..44948ff4d 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -280,10 +280,10 @@ static int check_bsize_coverage(int bs, int mi_rows, int mi_cols,
 static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
                        int mi_row, int bh,
                        int mi_col, int bw) {
-  xd->mb_to_top_edge    = -((mi_row * MI_SIZE) << 3);
-  xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) << 3;
-  xd->mb_to_left_edge   = -((mi_col * MI_SIZE) << 3);
-  xd->mb_to_right_edge  = ((cm->mi_cols - bw - mi_col) * MI_SIZE) << 3;
+  xd->mb_to_top_edge    = -((mi_row * MI_SIZE) * 8);
+  xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) * 8;
+  xd->mb_to_left_edge   = -((mi_col * MI_SIZE) * 8);
+  xd->mb_to_right_edge  = ((cm->mi_cols - bw - mi_col) * MI_SIZE) * 8;
 
   // Are edges available for intra prediction?
   xd->up_available    = (mi_row != 0);
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index dc1d46caa..0f2e4e999 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -59,8 +59,8 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                                const struct subpix_fn_table *subpix,
                                enum mv_precision precision) {
   const int is_q4 = precision == MV_PRECISION_Q4;
-  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row << 1,
-                     is_q4 ? src_mv->col : src_mv->col << 1 };
+  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+                     is_q4 ? src_mv->col : src_mv->col * 2 };
   const MV32 mv = scale->scale_mv(&mv_q4, scale);
   const int subpel_x = mv.col & SUBPEL_MASK;
   const int subpel_y = mv.row & SUBPEL_MASK;
@@ -100,16 +100,17 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
   const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
   const int spel_bottom = spel_top - SUBPEL_SHIFTS;
   MV clamped_mv = {
-    src_mv->row << (1 - ss_y),
-    src_mv->col << (1 - ss_x)
+    src_mv->row * (1 << (1 - ss_y)),
+    src_mv->col * (1 << (1 - ss_x))
   };
   assert(ss_x <= 1);
   assert(ss_y <= 1);
 
-  clamp_mv(&clamped_mv, (xd->mb_to_left_edge << (1 - ss_x)) - spel_left,
-                        (xd->mb_to_right_edge << (1 - ss_x)) + spel_right,
-                        (xd->mb_to_top_edge << (1 - ss_y)) - spel_top,
-                        (xd->mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
+  clamp_mv(&clamped_mv,
+           xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
+           xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
+           xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
+           xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
 
   return clamped_mv;
 }
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 957cfd2c0..622f75fe6 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -283,7 +283,7 @@ static void pack_mb_tokens(vp9_writer* const bc,
                            const TOKENEXTRA *const stop) {
   TOKENEXTRA *p = *tp;
 
-  while (p < stop) {
+  while (p < stop && p->token != EOSB_TOKEN) {
     const int t = p->token;
     const struct vp9_token *const a = vp9_coef_encodings + t;
     const vp9_extra_bit *const b = vp9_extra_bits + t;
@@ -293,10 +293,6 @@ static void pack_mb_tokens(vp9_writer* const bc,
     int n = a->len;
     vp9_prob probs[ENTROPY_NODES];
 
-    if (t == EOSB_TOKEN) {
-      ++p;
-      break;
-    }
     if (t >= TWO_TOKEN) {
       vp9_model_to_full_probs(p->context_tree, probs);
       pp = probs;
@@ -338,7 +334,7 @@ static void pack_mb_tokens(vp9_writer* const bc,
     ++p;
   }
 
-  *tp = p;
+  *tp = p + (p->token == EOSB_TOKEN);
 }
 
 static void write_sb_mv_ref(vp9_writer *w, MB_PREDICTION_MODE mode,
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 4f4ad04b3..ca863931e 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -58,10 +58,10 @@ void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
     for (i = 0; i < 4; ++i) {
       // Load inputs.
       if (0 == pass) {
-        input[0] = in[0 * stride] << 4;
-        input[1] = in[1 * stride] << 4;
-        input[2] = in[2 * stride] << 4;
-        input[3] = in[3 * stride] << 4;
+        input[0] = in[0 * stride] * 16;
+        input[1] = in[1 * stride] * 16;
+        input[2] = in[2 * stride] * 16;
+        input[3] = in[3 * stride] * 16;
         if (i == 0 && input[0]) {
           input[0] += 1;
         }
@@ -160,7 +160,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
   // Columns
   for (i = 0; i < 4; ++i) {
     for (j = 0; j < 4; ++j)
-      temp_in[j] = input[j * pitch + i] << 4;
+      temp_in[j] = input[j * pitch + i] * 16;
     if (i == 0 && temp_in[0])
       temp_in[0] += 1;
     ht.cols(temp_in, temp_out);
@@ -250,14 +250,14 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {
     int i;
     for (i = 0; i < 8; i++) {
       // stage 1
-      s0 = (input[0 * stride] + input[7 * stride]) << 2;
-      s1 = (input[1 * stride] + input[6 * stride]) << 2;
-      s2 = (input[2 * stride] + input[5 * stride]) << 2;
-      s3 = (input[3 * stride] + input[4 * stride]) << 2;
-      s4 = (input[3 * stride] - input[4 * stride]) << 2;
-      s5 = (input[2 * stride] - input[5 * stride]) << 2;
-      s6 = (input[1 * stride] - input[6 * stride]) << 2;
-      s7 = (input[0 * stride] - input[7 * stride]) << 2;
+      s0 = (input[0 * stride] + input[7 * stride]) * 4;
+      s1 = (input[1 * stride] + input[6 * stride]) * 4;
+      s2 = (input[2 * stride] + input[5 * stride]) * 4;
+      s3 = (input[3 * stride] + input[4 * stride]) * 4;
+      s4 = (input[3 * stride] - input[4 * stride]) * 4;
+      s5 = (input[2 * stride] - input[5 * stride]) * 4;
+      s6 = (input[1 * stride] - input[6 * stride]) * 4;
+      s7 = (input[0 * stride] - input[7 * stride]) * 4;
 
       // fdct4_1d(step, step);
       x0 = s0 + s3;
@@ -331,23 +331,23 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {
     for (i = 0; i < 16; i++) {
       if (0 == pass) {
         // Calculate input for the first 8 results.
-        input[0] = (in[0 * stride] + in[15 * stride]) << 2;
-        input[1] = (in[1 * stride] + in[14 * stride]) << 2;
-        input[2] = (in[2 * stride] + in[13 * stride]) << 2;
-        input[3] = (in[3 * stride] + in[12 * stride]) << 2;
-        input[4] = (in[4 * stride] + in[11 * stride]) << 2;
-        input[5] = (in[5 * stride] + in[10 * stride]) << 2;
-        input[6] = (in[6 * stride] + in[ 9 * stride]) << 2;
-        input[7] = (in[7 * stride] + in[ 8 * stride]) << 2;
+        input[0] = (in[0 * stride] + in[15 * stride]) * 4;
+        input[1] = (in[1 * stride] + in[14 * stride]) * 4;
+        input[2] = (in[2 * stride] + in[13 * stride]) * 4;
+        input[3] = (in[3 * stride] + in[12 * stride]) * 4;
+        input[4] = (in[4 * stride] + in[11 * stride]) * 4;
+        input[5] = (in[5 * stride] + in[10 * stride]) * 4;
+        input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;
+        input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;
         // Calculate input for the next 8 results.
-        step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2;
-        step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2;
-        step1[2] = (in[5 * stride] - in[10 * stride]) << 2;
-        step1[3] = (in[4 * stride] - in[11 * stride]) << 2;
-        step1[4] = (in[3 * stride] - in[12 * stride]) << 2;
-        step1[5] = (in[2 * stride] - in[13 * stride]) << 2;
-        step1[6] = (in[1 * stride] - in[14 * stride]) << 2;
-        step1[7] = (in[0 * stride] - in[15 * stride]) << 2;
+        step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;
+        step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;
+        step1[2] = (in[5 * stride] - in[10 * stride]) * 4;
+        step1[3] = (in[4 * stride] - in[11 * stride]) * 4;
+        step1[4] = (in[3 * stride] - in[12 * stride]) * 4;
+        step1[5] = (in[2 * stride] - in[13 * stride]) * 4;
+        step1[6] = (in[1 * stride] - in[14 * stride]) * 4;
+        step1[7] = (in[0 * stride] - in[15 * stride]) * 4;
       } else {
         // Calculate input for the first 8 results.
         input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
@@ -575,7 +575,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
   // Columns
   for (i = 0; i < 8; ++i) {
     for (j = 0; j < 8; ++j)
-      temp_in[j] = input[j * pitch + i] << 2;
+      temp_in[j] = input[j * pitch + i] * 4;
     ht.cols(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
       outptr[j * 8 + i] = temp_out[j];
@@ -975,7 +975,7 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
   // Columns
   for (i = 0; i < 16; ++i) {
     for (j = 0; j < 16; ++j)
-      temp_in[j] = input[j * pitch + i] << 2;
+      temp_in[j] = input[j * pitch + i] * 4;
     ht.cols(temp_in, temp_out);
     for (j = 0; j < 16; ++j)
       outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
@@ -1335,7 +1335,7 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
   for (i = 0; i < 32; ++i) {
     int temp_in[32], temp_out[32];
     for (j = 0; j < 32; ++j)
-      temp_in[j] = input[j * shortpitch + i] << 2;
+      temp_in[j] = input[j * shortpitch + i] * 4;
     dct32_1d(temp_in, temp_out, 0);
     for (j = 0; j < 32; ++j)
       output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
@@ -1364,7 +1364,7 @@ void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) {
   for (i = 0; i < 32; ++i) {
     int temp_in[32], temp_out[32];
     for (j = 0; j < 32; ++j)
-      temp_in[j] = input[j * shortpitch + i] << 2;
+      temp_in[j] = input[j * shortpitch + i] * 4;
     dct32_1d(temp_in, temp_out, 0);
     for (j = 0; j < 32; ++j)
       // TODO(cd): see quality impact of only doing
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 3b92a3905..ee938bda9 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -435,7 +435,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
       best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
       if (mbmi->mode == NEWMV) {
         best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int;
-        best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
+        if (rf2 > 0)
+          best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
       }
       mbmi->best_mv.as_int = best_mv.as_int;
       mbmi->best_second_mv.as_int = best_second_mv.as_int;
@@ -2627,7 +2628,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   } else {
     encode_frame_internal(cpi);
   }
-
 }
 
 static void sum_intra_stats(VP9_COMP *cpi, const MODE_INFO *mi) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 9cf7b8348..ad7e6d821 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -660,8 +660,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
             neutral_count++;
           }
 
-          mv.as_mv.row <<= 3;
-          mv.as_mv.col <<= 3;
+          mv.as_mv.row *= 8;
+          mv.as_mv.col *= 8;
           this_error = motion_error;
           vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
           xd->this_mi->mbmi.tx_size = TX_4X4;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 8f340d2af..ad8c8999a 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -339,13 +339,13 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
 
   int rr = ref_mv->as_mv.row;
   int rc = ref_mv->as_mv.col;
-  int br = bestmv->as_mv.row << 3;
-  int bc = bestmv->as_mv.col << 3;
+  int br = bestmv->as_mv.row * 8;
+  int bc = bestmv->as_mv.col * 8;
   int hstep = 4;
-  const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
-  const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
-  const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
-  const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
+  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
+  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
+  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
+  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
 
   int tr = br;
   int tc = bc;
@@ -436,20 +436,20 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
 
   int rr = ref_mv->as_mv.row;
   int rc = ref_mv->as_mv.col;
-  int br = bestmv->as_mv.row << 3;
-  int bc = bestmv->as_mv.col << 3;
+  int br = bestmv->as_mv.row * 8;
+  int bc = bestmv->as_mv.col * 8;
   int hstep = 4;
-  const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
-  const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
-  const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
-  const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
+  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
+  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
+  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
+  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
 
   int tr = br;
   int tc = bc;
 
   // central mv
-  bestmv->as_mv.row <<= 3;
-  bestmv->as_mv.col <<= 3;
+  bestmv->as_mv.row *= 8;
+  bestmv->as_mv.col *= 8;
 
   // calculate central point error
   besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
@@ -532,20 +532,20 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
 
   int rr = ref_mv->as_mv.row;
   int rc = ref_mv->as_mv.col;
-  int br = bestmv->as_mv.row << 3;
-  int bc = bestmv->as_mv.col << 3;
+  int br = bestmv->as_mv.row * 8;
+  int bc = bestmv->as_mv.col * 8;
   int hstep = 4;
-  const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
-  const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
-  const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
-  const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
+  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
+  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
+  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
+  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
 
   int tr = br;
   int tc = bc;
 
   // central mv
-  bestmv->as_mv.row <<= 3;
-  bestmv->as_mv.col <<= 3;
+  bestmv->as_mv.row *= 8;
+  bestmv->as_mv.col *= 8;
 
   // calculate central point error
   // TODO(yunqingwang): central pointer error was already calculated in full-
@@ -634,20 +634,20 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
 
   int rr = ref_mv->as_mv.row;
   int rc = ref_mv->as_mv.col;
-  int br = bestmv->as_mv.row << 3;
-  int bc = bestmv->as_mv.col << 3;
+  int br = bestmv->as_mv.row * 8;
+  int bc = bestmv->as_mv.col * 8;
   int hstep = 4;
-  const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
-  const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
-  const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
-  const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
+  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
+  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
+  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
+  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
 
   int tr = br;
   int tc = bc;
 
   // central mv
-  bestmv->as_mv.row <<= 3;
-  bestmv->as_mv.col <<= 3;
+  bestmv->as_mv.row *= 8;
+  bestmv->as_mv.col *= 8;
 
   // calculate central point error
   // TODO(yunqingwang): central pointer error was already calculated in full-
@@ -980,8 +980,8 @@ static int vp9_pattern_search(MACROBLOCK *x,
 
   this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) +
       best_mv->as_mv.col;
-  this_mv.as_mv.row = best_mv->as_mv.row << 3;
-  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
   if (bestsad == INT_MAX)
     return INT_MAX;
   return
@@ -1243,8 +1243,8 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
       (*num00)++;
   }
 
-  this_mv.as_mv.row = best_mv->as_mv.row << 3;
-  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
 
   if (bestsad == INT_MAX)
     return INT_MAX;
@@ -1416,8 +1416,8 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
       (*num00)++;
   }
 
-  this_mv.as_mv.row = best_mv->as_mv.row << 3;
-  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
 
   if (bestsad == INT_MAX)
     return INT_MAX;
@@ -1567,8 +1567,8 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
     }
   }
 
-  this_mv.as_mv.row = best_mv->as_mv.row << 3;
-  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
 
   if (bestsad < INT_MAX)
     return
@@ -1688,8 +1688,8 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
 
   }
 
-  this_mv.as_mv.row = best_mv->as_mv.row << 3;
-  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
 
   if (bestsad < INT_MAX)
     return
@@ -1836,8 +1836,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
     }
   }
 
-  this_mv.as_mv.row = best_mv->as_mv.row << 3;
-  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
 
   if (bestsad < INT_MAX)
     return
@@ -1912,8 +1912,8 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
     }
   }
 
-  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
-  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
 
   if (bestsad < INT_MAX)
     return
@@ -2018,8 +2018,8 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
     }
   }
 
-  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
-  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
 
   if (bestsad < INT_MAX)
     return
@@ -2109,8 +2109,8 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
     }
   }
 
-  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
-  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
 
   if (bestsad < INT_MAX) {
     // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 92edf49ab..9b20dafde 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -392,8 +392,7 @@ typedef struct VP9_COMP {
   int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
 
   int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES];
-  // FIXME(rbultje) int64_t?
-  int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
+  int64_t rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
   unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2];
   unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2];
   unsigned int single_ref_count[REF_CONTEXTS][2][2];
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 64185fcfa..470379c47 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -269,7 +269,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
         cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
             cost_token(vp9_inter_mode_tree,
                        cpi->common.fc.inter_mode_probs[i],
-                       vp9_inter_mode_encodings - NEARESTMV + m);
+                       vp9_inter_mode_encodings + (m - NEARESTMV));
     }
   }
 }
@@ -1683,17 +1683,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       i = idy * 2 + idx;
 
       frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
-      frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
       vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
                                     &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
                                     &frame_mv[NEARMV][mbmi->ref_frame[0]],
                                     i, 0, mi_row, mi_col);
-      if (has_second_rf)
+      if (has_second_rf) {
+        frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
         vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
-                                   &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
-                                   &frame_mv[NEARMV][mbmi->ref_frame[1]],
-                                   i, 1, mi_row, mi_col);
-
+                                      &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
+                                      &frame_mv[NEARMV][mbmi->ref_frame[1]],
+                                      i, 1, mi_row, mi_col);
+      }
       // search for the best motion vector on this segment
       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
         const struct buf_2d orig_src = x->plane[0].src;
@@ -3262,8 +3262,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       continue;
 
     // Test best rd so far against threshold for trying this mode.
-    if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
-                     cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) ||
+    if ((best_rd < ((int64_t)cpi->rd_threshes[bsize][mode_index] *
+                     cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
         cpi->rd_threshes[bsize][mode_index] == INT_MAX)
       continue;
 
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 28d8f3637..4f63c52fd 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -769,7 +769,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
         }
 
         /* Add the frame packet to the list of returned packets. */
-        round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1;
+        round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1;
         delta = (dst_end_time_stamp - dst_time_stamp);
         pkt.kind = VPX_CODEC_CX_FRAME_PKT;
         pkt.data.frame.pts =
diff --git a/vpxenc.c b/vpxenc.c
index 0c742ca22..c618ef891 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -883,10 +883,10 @@ static unsigned int murmur(const void *key, int len, unsigned int seed) {
   while (len >= 4) {
     unsigned int k;
 
-    k  = data[0];
-    k |= data[1] << 8;
-    k |= data[2] << 16;
-    k |= data[3] << 24;
+    k  = (unsigned int)data[0];
+    k |= (unsigned int)data[1] << 8;
+    k |= (unsigned int)data[2] << 16;
+    k |= (unsigned int)data[3] << 24;
 
     k *= m;
     k ^= k >> r;
@@ -1765,9 +1765,13 @@ static void parse_global_config(struct global_config *global, char **argv) {
 
   /* Validate global config */
   if (global->passes == 0) {
+#if CONFIG_VP9_ENCODER
     // Make default VP9 passes = 2 until there is a better quality 1-pass
     // encoder
     global->passes = (global->codec->iface == vpx_codec_vp9_cx ? 2 : 1);
+#else
+    global->passes = 1;
+#endif
   }
 
   if (global->pass) {
@@ -2671,8 +2675,8 @@ int main(int argc, const char **argv_) {
           fprintf(stderr, "%7"PRId64" %s %.2f %s ",
                   cx_time > 9999999 ? cx_time / 1000 : cx_time,
                   cx_time > 9999999 ? "ms" : "us",
-                  fps >= 1.0 ? fps : 1000.0 / fps,
-                  fps >= 1.0 ? "fps" : "ms/f");
+                  fps >= 1.0 ? fps : fps * 60,
+                  fps >= 1.0 ? "fps" : "fpm");
           print_time("ETA", estimated_time_left);
           fprintf(stderr, "\033[K");
         }