32 files changed, 420 insertions, 372 deletions
diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
new file mode 100644
index 000000000..869ee5f3f
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
@@ -0,0 +1,68 @@
+;
+;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |vp9_short_idct4x4_1_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
+;                                  int dest_stride)
+;
+; r0  int16_t input
+; r1  uint8_t *dest
+; r2  int dest_stride)
+
+|vp9_short_idct4x4_1_add_neon| PROC
+    ldrsh            r0, [r0]
+
+    ; generate cospi_16_64 = 11585
+    mov              r12, #0x2d00
+    add              r12, #0x41
+
+    ; out = dct_const_round_shift(input[0] * cospi_16_64)
+    mul              r0, r0, r12               ; input[0] * cospi_16_64
+    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
+    asr              r0, r0, #14               ; >> DCT_CONST_BITS
+
+    ; out = dct_const_round_shift(out * cospi_16_64)
+    mul              r0, r0, r12               ; out * cospi_16_64
+    mov              r12, r1                   ; save dest
+    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
+    asr              r0, r0, #14               ; >> DCT_CONST_BITS
+
+    ; a1 = ROUND_POWER_OF_TWO(out, 4)
+    add              r0, r0, #8                ; + (1 <<((4) - 1))
+    asr              r0, r0, #4                ; >> 4
+
+    vdup.s16         q0, r0                    ; duplicate a1
+
+    vld1.32          {d2[0]}, [r1], r2
+    vld1.32          {d2[1]}, [r1], r2
+    vld1.32          {d4[0]}, [r1], r2
+    vld1.32          {d4[1]}, [r1]
+
+    vaddw.u8         q8, q0, d2                ; dest[x] + a1
+    vaddw.u8         q9, q0, d4
+
+    vqmovun.s16      d6, q8                    ; clip_pixel
+    vqmovun.s16      d7, q9
+
+    vst1.32          {d6[0]}, [r12], r2
+    vst1.32          {d6[1]}, [r12], r2
+    vst1.32          {d7[0]}, [r12], r2
+    vst1.32          {d7[1]}, [r12]
+
+    bx               lr
+    ENDP             ; |vp9_short_idct4x4_1_add_neon|
+
+    END
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index dfd152646..9a42ad9f1 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -89,9 +89,9 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWMV;
 }
 
-#define VP9_INTRA_MODES (TM_PRED + 1)
+#define INTRA_MODES (TM_PRED + 1)
 
-#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
 
 static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
   return (mode - NEARESTMV);
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 1d9684992..1e6cd4404 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -49,7 +49,7 @@ static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
       for (k = 0; k < taps; ++k)
         sum += src[src_x + k] * filter_x[k];
 
-      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS));
+      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
 
       /* Move to the next source pixel */
       x_q4 += x_step_q4;
@@ -91,7 +91,7 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
         sum += src[src_x + k] * filter_x[k];
 
       dst[x] = ROUND_POWER_OF_TWO(dst[x] +
-                   clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1);
+                   clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
 
       /* Move to the next source pixel */
       x_q4 += x_step_q4;
@@ -133,7 +133,7 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
         sum += src[(src_y + k) * src_stride] * filter_y[k];
 
       dst[y * dst_stride] =
-          clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS));
+          clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
 
       /* Move to the next source pixel */
       y_q4 += y_step_q4;
@@ -175,7 +175,7 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
         sum += src[(src_y + k) * src_stride] * filter_y[k];
 
       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
-           clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1);
+           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
 
       /* Move to the next source pixel */
       y_q4 += y_step_q4;
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
index 9522b78bc..13220e97e 100644
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -13,7 +13,7 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 
-#define VP9_FILTER_BITS 7
+#define FILTER_BITS 7
 
 typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 84d090c31..a75d1a9a4 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -14,8 +14,8 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_seg_common.h"
 
-const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
-                                  [VP9_INTRA_MODES - 1] = {
+const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES]
+                                  [INTRA_MODES - 1] = {
   { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
   { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
   { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
@@ -29,15 +29,15 @@ const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
 };
 
 static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
-                                        [VP9_INTRA_MODES - 1] = {
+                                        [INTRA_MODES - 1] = {
   {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* block_size < 8x8 */,
   { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* block_size < 16x16 */,
   { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* block_size < 32x32 */,
   { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* block_size >= 32x32 */
 };
 
-static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
-                                         [VP9_INTRA_MODES - 1] = {
+static const vp9_prob default_if_uv_probs[INTRA_MODES]
+                                         [INTRA_MODES - 1] = {
   { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
   {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
   {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
@@ -98,9 +98,9 @@ static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
   }
 };
 
-const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
-                                 [VP9_INTRA_MODES]
-                                 [VP9_INTRA_MODES - 1] = {
+const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES]
+                                 [INTRA_MODES]
+                                 [INTRA_MODES - 1] = {
   { /* above = dc */
     { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
     {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
@@ -215,7 +215,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
 };
 
 static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
-                                              [VP9_INTER_MODES - 1] = {
+                                              [INTER_MODES - 1] = {
   {2,       173,   34},  // 0 = both zero mv
   {7,       145,   85},  // 1 = one zero mv + one a predicted mv
   {7,       166,   63},  // 2 = two predicted mvs
@@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
 };
 
 /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
-const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
+const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = {
   -DC_PRED, 2,                      /* 0 = DC_NODE */
   -TM_PRED, 4,                      /* 1 = TM_NODE */
   -V_PRED, 6,                       /* 2 = V_NODE */
@@ -250,8 +250,8 @@ const vp9_tree_index vp9_partition_tree[6] = {
   -PARTITION_VERT, -PARTITION_SPLIT
 };
 
-struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
-struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
+struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
+struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
 
 struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
 
@@ -317,8 +317,8 @@ static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
   192, 128, 64
 };
 
-static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1]
-                                                  [VP9_SWITCHABLE_FILTERS-1] = {
+static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTERS+1]
+                                                  [SWITCHABLE_FILTERS-1] = {
   { 235, 162, },
   { 36, 255, },
   { 34, 3, },
@@ -338,11 +338,11 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
   vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
 }
 
-const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
+const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = {
   -EIGHTTAP, 2,
   -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
 };
-struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
 
 void vp9_entropy_mode_init() {
   vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree);
@@ -400,17 +400,17 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
                                              counts->single_ref[i][j]);
 
   for (i = 0; i < INTER_MODE_CONTEXTS; i++)
-    update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree,
+    update_mode_probs(INTER_MODES, vp9_inter_mode_tree,
                       counts->inter_mode[i], pre_fc->inter_mode_probs[i],
                       fc->inter_mode_probs[i], NEARESTMV);
 
   for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
-    update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
+    update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
                       counts->y_mode[i], pre_fc->y_mode_prob[i],
                       fc->y_mode_prob[i], 0);
 
-  for (i = 0; i < VP9_INTRA_MODES; ++i)
-    update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
+  for (i = 0; i < INTRA_MODES; ++i)
+    update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
                       counts->uv_mode[i], pre_fc->uv_mode_prob[i],
                       fc->uv_mode_prob[i], 0);
 
@@ -421,8 +421,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
                       fc->partition_prob[INTER_FRAME][i], 0);
 
   if (cm->mcomp_filter_type == SWITCHABLE) {
-    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
-      update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
+    for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+      update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
                         counts->switchable_interp[i],
                         pre_fc->switchable_interp_prob[i],
                         fc->switchable_interp_prob[i], 0);
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index fced2cdfe..2f8085df6 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -16,8 +16,8 @@
 
 #define SUBMVREF_COUNT 5
 #define TX_SIZE_CONTEXTS 2
-#define VP9_MODE_UPDATE_PROB  252
-#define VP9_SWITCHABLE_FILTERS 3   // number of switchable filters
+#define MODE_UPDATE_PROB  252
+#define SWITCHABLE_FILTERS 3   // number of switchable filters
 
 // #define MODE_STATS
 
@@ -35,24 +35,24 @@ struct tx_counts {
   unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
 };
 
-extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
-extern const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES]
-                                        [VP9_INTRA_MODES - 1];
+extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
+                                        [INTRA_MODES - 1];
 
 extern const vp9_tree_index vp9_intra_mode_tree[];
 extern const vp9_tree_index vp9_inter_mode_tree[];
 
-extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
-extern struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
+extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
+extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
 
 // probability models for partition information
 extern const vp9_tree_index vp9_partition_tree[];
 extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
 
 extern const vp9_tree_index vp9_switchable_interp_tree
-                 [2 * (VP9_SWITCHABLE_FILTERS - 1)];
+                 [2 * (SWITCHABLE_FILTERS - 1)];
 
-extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
+extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
 
 void vp9_entropy_mode_init();
 
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 85a1f3aa0..92581da2c 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -24,7 +24,7 @@ void vp9_init_mv_probs(struct VP9Common *cm);
 void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
 int vp9_use_mv_hp(const MV *ref);
 
-#define VP9_NMV_UPDATE_PROB  252
+#define NMV_UPDATE_PROB  252
 
 /* Symbols for coding which components are zero jointly */
 #define MV_JOINTS     4
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 33a97ccb3..7b1ffaeda 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -27,7 +27,7 @@ extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][SUBPEL_TAPS];
 
 // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
 // filter kernel as a 2 tap filter.
-#define VP9_BILINEAR_FILTERS_2TAP(x) \
+#define BILINEAR_FILTERS_2TAP(x) \
   (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
 
 #endif  // VP9_COMMON_VP9_FILTER_H_
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d40f04f95..a669cc5e7 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -38,14 +38,14 @@
 #define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LOG2)
 
 typedef struct frame_contexts {
-  vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES - 1];
-  vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
+  vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+  vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
   vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
                          [PARTITION_TYPES - 1];
   vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
-  vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
-                                 [VP9_SWITCHABLE_FILTERS - 1];
-  vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
+  vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
+                                 [SWITCHABLE_FILTERS - 1];
+  vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
   vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
   vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
   vp9_prob single_ref_prob[REF_CONTEXTS][2];
@@ -56,15 +56,15 @@ typedef struct frame_contexts {
 } FRAME_CONTEXT;
 
 typedef struct {
-  unsigned int y_mode[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES];
-  unsigned int uv_mode[VP9_INTRA_MODES][VP9_INTRA_MODES];
+  unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
+  unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
   unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
   vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
   unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
                          [COEF_BANDS][PREV_COEF_CONTEXTS];
-  unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1]
-                                [VP9_SWITCHABLE_FILTERS];
-  unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES];
+  unsigned int switchable_interp[SWITCHABLE_FILTERS + 1]
+                                [SWITCHABLE_FILTERS];
+  unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
   unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
   unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
   unsigned int single_ref[REF_CONTEXTS][2][2];
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index 58e0e1d67..859c99ed5 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -53,7 +53,7 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {
   { RGB_TO_YUV(0xCC33FF) },   /* Magenta */
 };
 
-static const unsigned char B_PREDICTION_MODE_colors[VP9_INTRA_MODES][3] = {
+static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = {
   { RGB_TO_YUV(0x6633ff) },   /* Purple */
   { RGB_TO_YUV(0xcc33ff) },   /* Magenta */
   { RGB_TO_YUV(0xff33cc) },   /* Pink */
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index b00f58392..97ccb1376 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -31,25 +31,25 @@ unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
   const int left_mv_pred = is_inter_mode(left_mbmi->mode);
   const int left_interp = left_in_image && left_mv_pred
                               ? left_mbmi->interp_filter
-                              : VP9_SWITCHABLE_FILTERS;
+                              : SWITCHABLE_FILTERS;
 
   // above
   const int above_mv_pred = is_inter_mode(above_mbmi->mode);
   const int above_interp = above_in_image && above_mv_pred
                                ? above_mbmi->interp_filter
-                               : VP9_SWITCHABLE_FILTERS;
+                               : SWITCHABLE_FILTERS;
 
 
   if (left_interp == above_interp)
     return left_interp;
-  else if (left_interp == VP9_SWITCHABLE_FILTERS &&
-           above_interp != VP9_SWITCHABLE_FILTERS)
+  else if (left_interp == SWITCHABLE_FILTERS &&
+           above_interp != SWITCHABLE_FILTERS)
     return above_interp;
-  else if (left_interp != VP9_SWITCHABLE_FILTERS &&
-           above_interp == VP9_SWITCHABLE_FILTERS)
+  else if (left_interp != SWITCHABLE_FILTERS &&
+           above_interp == SWITCHABLE_FILTERS)
     return left_interp;
   else
-    return VP9_SWITCHABLE_FILTERS;
+    return SWITCHABLE_FILTERS;
 }
 // Returns a context number for the given MB prediction signal
 unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) {
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index f1d855695..4a451b909 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -285,7 +285,7 @@ intra_pred_allsizes(dc)
 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
                               const uint8_t *above, const uint8_t *left);
 
-static intra_pred_fn pred[VP9_INTRA_MODES][4];
+static intra_pred_fn pred[INTRA_MODES][4];
 static intra_pred_fn dc_pred[2][2][4];
 
 static void init_intra_pred_fn_ptrs(void) {
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index d075443ed..30c1b26d0 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -295,7 +295,7 @@ specialize vp9_convolve8_avg_vert ssse3 neon
 # dct
 #
 prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_1_add sse2
+specialize vp9_short_idct4x4_1_add sse2 neon
 
 prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
 specialize vp9_short_idct4x4_add sse2 neon
@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
 specialize vp9_quantize_b $ssse3_x86_64
 
 prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 $ssse3_x86_64
+specialize vp9_quantize_b_32x32
 
 #
 # Structured Similarity (SSIM)
diff --git a/vp9/common/vp9_scale.c b/vp9/common/vp9_scale.c
index 0b8dc23ea..989206c60 100644
--- a/vp9/common/vp9_scale.c
+++ b/vp9/common/vp9_scale.c
@@ -13,11 +13,11 @@
 #include "vp9/common/vp9_scale.h"
 
 static INLINE int scaled_x(int val, const struct scale_factors *scale) {
-  return val * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT;
+  return val * scale->x_scale_fp >> REF_SCALE_SHIFT;
 }
 
 static INLINE int scaled_y(int val, const struct scale_factors *scale) {
-  return val * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT;
+  return val * scale->y_scale_fp >> REF_SCALE_SHIFT;
 }
 
 static int unscaled_value(int val, const struct scale_factors *scale) {
@@ -58,7 +58,7 @@ static int get_fixed_point_scale_factor(int other_size, int this_size) {
   // and use fixed point scaling factors in decoding and encoding routines.
   // Hardware implementations can calculate scale factor in device driver
   // and use multiplication and shifting on hardware instead of division.
-  return (other_size << VP9_REF_SCALE_SHIFT) / this_size;
+  return (other_size << REF_SCALE_SHIFT) / this_size;
 }
 
 static int check_scale_factors(int other_w, int other_h,
@@ -73,8 +73,8 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
                                        int other_w, int other_h,
                                        int this_w, int this_h) {
   if (!check_scale_factors(other_w, other_h, this_w, this_h)) {
-    scale->x_scale_fp = VP9_REF_INVALID_SCALE;
-    scale->y_scale_fp = VP9_REF_INVALID_SCALE;
+    scale->x_scale_fp = REF_INVALID_SCALE;
+    scale->y_scale_fp = REF_INVALID_SCALE;
     return;
   }
 
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 827ae9bce..7a720d035 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -14,9 +14,9 @@
 #include "vp9/common/vp9_mv.h"
 #include "vp9/common/vp9_convolve.h"
 
-#define VP9_REF_SCALE_SHIFT 14
-#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT)
-#define VP9_REF_INVALID_SCALE -1
+#define REF_SCALE_SHIFT 14
+#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
+#define REF_INVALID_SCALE -1
 
 struct scale_factors {
   int x_scale_fp;   // horizontal fixed point scale factor
@@ -39,13 +39,13 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
                                        int this_w, int this_h);
 
 static int vp9_is_valid_scale(const struct scale_factors *sf) {
-  return sf->x_scale_fp != VP9_REF_INVALID_SCALE &&
-         sf->y_scale_fp != VP9_REF_INVALID_SCALE;
+  return sf->x_scale_fp != REF_INVALID_SCALE &&
+         sf->y_scale_fp != REF_INVALID_SCALE;
 }
 
 static int vp9_is_scaled(const struct scale_factors *sf) {
-  return sf->x_scale_fp != VP9_REF_NO_SCALE ||
-         sf->y_scale_fp != VP9_REF_NO_SCALE;
+  return sf->x_scale_fp != REF_NO_SCALE ||
+         sf->y_scale_fp != REF_NO_SCALE;
 }
 
 #endif  //  VP9_COMMON_VP9_SCALE_H_
diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h
index 78d42359b..fe75481f6 100644
--- a/vp9/common/vp9_subpelvar.h
+++ b/vp9/common/vp9_subpelvar.h
@@ -81,7 +81,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
     for (j = 0; j < output_width; j++) {
       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                           (int)src_ptr[pixel_step] * vp9_filter[1],
-                          VP9_FILTER_BITS);
+                          FILTER_BITS);
 
       src_ptr++;
     }
@@ -133,7 +133,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
     for (j = 0; j < output_width; j++) {
       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
                           (int)src_ptr[pixel_step] * vp9_filter[1],
-                          VP9_FILTER_BITS);
+                          FILTER_BITS);
       src_ptr++;
     }
 
diff --git a/vp9/decoder/vp9_dboolhuff.c b/vp9/decoder/vp9_dboolhuff.c
index 31b1ae2b0..06acec4db 100644
--- a/vp9/decoder/vp9_dboolhuff.c
+++ b/vp9/decoder/vp9_dboolhuff.c
@@ -16,7 +16,7 @@
 // This is meant to be a large, positive constant that can still be efficiently
 // loaded as an immediate (on platforms like ARM, for example).
 // Even relatively modest values like 100 would work fine.
-#define VP9_LOTS_OF_BITS 0x40000000
+#define LOTS_OF_BITS 0x40000000
 
 
 int vp9_reader_init(vp9_reader *r, const uint8_t *buffer, size_t size) {
@@ -41,13 +41,13 @@ void vp9_reader_fill(vp9_reader *r) {
   const uint8_t *buffer = r->buffer;
   VP9_BD_VALUE value = r->value;
   int count = r->count;
-  int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8);
+  int shift = BD_VALUE_SIZE - 8 - (count + 8);
   int loop_end = 0;
   const int bits_left = (int)((buffer_end - buffer)*CHAR_BIT);
   const int x = shift + CHAR_BIT - bits_left;
 
   if (x >= 0) {
-    count += VP9_LOTS_OF_BITS;
+    count += LOTS_OF_BITS;
     loop_end = x;
   }
 
@@ -66,7 +66,7 @@ void vp9_reader_fill(vp9_reader *r) {
 
 const uint8_t *vp9_reader_find_end(vp9_reader *r) {
   // Find the end of the coded buffer
-  while (r->count > CHAR_BIT && r->count < VP9_BD_VALUE_SIZE) {
+  while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) {
     r->count -= CHAR_BIT;
     r->buffer--;
   }
@@ -83,10 +83,10 @@ int vp9_reader_has_error(vp9_reader *r) {
   //
   // When reading a byte from the user's buffer, count is filled with 8 and
   // one byte is filled into the value buffer. When we reach the end of the
-  // data, count is additionally filled with VP9_LOTS_OF_BITS. So when
-  // count == VP9_LOTS_OF_BITS - 1, the user's data has been exhausted.
+  // data, count is additionally filled with LOTS_OF_BITS. So when
+  // count == LOTS_OF_BITS - 1, the user's data has been exhausted.
   //
   // 1 if we have tried to decode bits after the end of stream was encountered.
   // 0 No error.
-  return r->count > VP9_BD_VALUE_SIZE && r->count < VP9_LOTS_OF_BITS;
+  return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS;
 }
diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h
index c46dd73a3..c86451649 100644
--- a/vp9/decoder/vp9_dboolhuff.h
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -20,7 +20,7 @@
 
 typedef size_t VP9_BD_VALUE;
 
-#define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
+#define BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
 
 typedef struct {
   const uint8_t *buffer_end;
@@ -52,7 +52,7 @@ static int vp9_read(vp9_reader *br, int probability) {
   value = br->value;
   count = br->count;
 
-  bigsplit = (VP9_BD_VALUE)split << (VP9_BD_VALUE_SIZE - 8);
+  bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8);
 
   range = split;
 
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 747877d80..d1c59c364 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -251,7 +251,7 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
 }
 
 static void update_mv(vp9_reader *r, vp9_prob *p) {
-  if (vp9_read(r, VP9_NMV_UPDATE_PROB))
+  if (vp9_read(r, NMV_UPDATE_PROB))
     *p = (vp9_read_literal(r, 7) << 1) | 1;
 }
 
@@ -345,17 +345,17 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r,
 
 static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
   int i, j;
-  for (j = 0; j < VP9_SWITCHABLE_FILTERS + 1; ++j)
-    for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+  for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j)
+    for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
 }
 
 static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
   int i, j;
   for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-    for (j = 0; j < VP9_INTER_MODES - 1; ++j)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+    for (j = 0; j < INTER_MODES - 1; ++j)
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
 }
 
@@ -551,22 +551,14 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
     switch (mbmi->mode) {
       case NEARMV:
         mv0->as_int = nearby.as_int;
-        clamp_mv2(&mv0->as_mv, xd);
-
-        if (is_compound) {
+        if (is_compound)
           mv1->as_int = nearby_second.as_int;
-          clamp_mv2(&mv1->as_mv, xd);
-        }
         break;
 
       case NEARESTMV:
         mv0->as_int = nearest.as_int;
-        clamp_mv2(&mv0->as_mv, xd);
-
-        if (is_compound) {
+        if (is_compound)
           mv1->as_int = nearest_second.as_int;
-          clamp_mv2(&mv1->as_mv, xd);
-        }
         break;
 
       case ZEROMV:
@@ -615,20 +607,20 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
 
   if (cm->comp_pred_mode == HYBRID_PREDICTION)
     for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
 
   if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
     for (i = 0; i < REF_CONTEXTS; i++) {
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
     }
 
   if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
     for (i = 0; i < REF_CONTEXTS; i++)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
 }
 
@@ -639,7 +631,7 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
   // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove.
   // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs));
   for (k = 0; k < MBSKIP_CONTEXTS; ++k)
-    if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+    if (vp9_read(r, MODE_UPDATE_PROB))
       vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]);
 
   if (cm->frame_type != KEY_FRAME && !cm->intra_only) {
@@ -653,19 +645,19 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
       read_switchable_interp_probs(&cm->fc, r);
 
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
 
     read_comp_pred(cm, r);
 
     for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
-      for (i = 0; i < VP9_INTRA_MODES - 1; ++i)
-        if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      for (i = 0; i < INTRA_MODES - 1; ++i)
+        if (vp9_read(r, MODE_UPDATE_PROB))
           vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]);
 
     for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j)
       for (i = 0; i < PARTITION_TYPES - 1; ++i)
-        if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+        if (vp9_read(r, MODE_UPDATE_PROB))
           vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]);
 
     read_mv_probs(r, nmvc, xd->allow_high_precision_mv);
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 5e9d24edc..fd88b6e6c 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -63,17 +63,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
 
   for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
     for (j = 0; j < TX_SIZES - 3; ++j)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
 
   for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
     for (j = 0; j < TX_SIZES - 2; ++j)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
 
   for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
     for (j = 0; j < TX_SIZES - 1; ++j)
-      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+      if (vp9_read(r, MODE_UPDATE_PROB))
         vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
 }
 
@@ -592,6 +592,7 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
       lf_data->frame_buffer = fb;
       lf_data->cm = pc;
       lf_data->xd = pbi->mb;
+      lf_data->stop = 0;
       lf_data->y_only = 0;
     }
     vp9_loop_filter_frame_init(pc, pc->lf.filter_level);
@@ -615,6 +616,9 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
       if (num_threads > 1) {
         LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
 
+        // decoding has completed: finish up the loop filter in this thread.
+        if (mi_row + MI_BLOCK_SIZE >= pc->cur_tile_mi_row_end) continue;
+
         vp9_worker_sync(&pbi->lf_worker);
         lf_data->start = lf_start;
         lf_data->stop = mi_row;
@@ -627,13 +631,17 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
   }
 
   if (pbi->do_loopfilter_inline) {
+    int lf_start;
     if (num_threads > 1) {
-      // TODO(jzern): since the loop filter is delayed one mb row, this will be
-      // forced to wait for the last row scheduled in the for loop.
+      LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
       vp9_worker_sync(&pbi->lf_worker);
+      lf_start = lf_data->stop;
+    } else {
+      lf_start = mi_row - MI_BLOCK_SIZE;
     }
     vp9_loop_filter_rows(fb, pc, &pbi->mb,
-                         mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
+                         lf_start, pc->mi_rows, 0);
   }
 }
 
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index f6846e7fe..d7c73b665 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -41,9 +41,9 @@ unsigned __int64 Sectionbits[500];
 #endif
 
 #ifdef ENTROPY_STATS
-int intra_mode_stats[VP9_INTRA_MODES]
-                    [VP9_INTRA_MODES]
-                    [VP9_INTRA_MODES];
+int intra_mode_stats[INTRA_MODES]
+                    [INTRA_MODES]
+                    [INTRA_MODES];
 vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
 
 extern unsigned int active_section;
@@ -54,8 +54,8 @@ extern unsigned int active_section;
 int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES];
 int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1];
 int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2];
-int64_t switchable_interp_stats[VP9_SWITCHABLE_FILTERS+1]
-                               [VP9_SWITCHABLE_FILTERS];
+int64_t switchable_interp_stats[SWITCHABLE_FILTERS+1]
+                               [SWITCHABLE_FILTERS];
 
 void init_tx_count_stats() {
   vp9_zero(tx_count_32x32p_stats);
@@ -88,8 +88,8 @@ static void update_tx_count_stats(VP9_COMMON *cm) {
 
 static void update_switchable_interp_stats(VP9_COMMON *cm) {
   int i, j;
-  for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; ++i)
-    for (j = 0; j < VP9_SWITCHABLE_FILTERS; ++j) {
+  for (i = 0; i < SWITCHABLE_FILTERS+1; ++i)
+    for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
       switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j];
     }
 }
@@ -141,11 +141,11 @@ void write_switchable_interp_stats() {
   fclose(fp);
 
   printf(
-      "vp9_default_switchable_filter_count[VP9_SWITCHABLE_FILTERS+1]"
-      "[VP9_SWITCHABLE_FILTERS] = {\n");
-  for (i = 0; i < VP9_SWITCHABLE_FILTERS+1; i++) {
+      "vp9_default_switchable_filter_count[SWITCHABLE_FILTERS+1]"
+      "[SWITCHABLE_FILTERS] = {\n");
+  for (i = 0; i < SWITCHABLE_FILTERS+1; i++) {
     printf("  { ");
-    for (j = 0; j < VP9_SWITCHABLE_FILTERS; j++) {
+    for (j = 0; j < SWITCHABLE_FILTERS; j++) {
       printf("%"PRId64", ", switchable_interp_stats[i][j]);
     }
     printf("},\n");
@@ -181,7 +181,7 @@ static void update_mode(
   n--;
 
   for (i = 0; i < n; ++i) {
-    vp9_cond_prob_diff_update(w, &Pcur[i], VP9_MODE_UPDATE_PROB, bct[i]);
+    vp9_cond_prob_diff_update(w, &Pcur[i], MODE_UPDATE_PROB, bct[i]);
   }
 }
 
@@ -189,11 +189,11 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi,
                                       vp9_writer* const bc) {
   VP9_COMMON *const cm = &cpi->common;
   int j;
-  vp9_prob pnew[VP9_INTRA_MODES - 1];
-  unsigned int bct[VP9_INTRA_MODES - 1][2];
+  vp9_prob pnew[INTRA_MODES - 1];
+  unsigned int bct[INTRA_MODES - 1][2];
 
   for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
-    update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_tree, pnew,
+    update_mode(bc, INTRA_MODES, vp9_intra_mode_tree, pnew,
                 cm->fc.y_mode_prob[j], bct,
                 (unsigned int *)cpi->y_mode_count[j]);
 }
@@ -228,7 +228,7 @@ void vp9_update_skip_probs(VP9_COMP *cpi, vp9_writer *w) {
 
   for (k = 0; k < MBSKIP_CONTEXTS; ++k)
     vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k],
-                              VP9_MODE_UPDATE_PROB, cm->counts.mbskip[k]);
+                              MODE_UPDATE_PROB, cm->counts.mbskip[k]);
 }
 
 static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) {
@@ -238,20 +238,20 @@ static void write_intra_mode(vp9_writer *bc, int m, const vp9_prob *p) {
 static void update_switchable_interp_probs(VP9_COMP *const cpi,
                                            vp9_writer* const bc) {
   VP9_COMMON *const pc = &cpi->common;
-  unsigned int branch_ct[VP9_SWITCHABLE_FILTERS + 1]
-                        [VP9_SWITCHABLE_FILTERS - 1][2];
-  vp9_prob new_prob[VP9_SWITCHABLE_FILTERS + 1][VP9_SWITCHABLE_FILTERS - 1];
+  unsigned int branch_ct[SWITCHABLE_FILTERS + 1]
+                        [SWITCHABLE_FILTERS - 1][2];
+  vp9_prob new_prob[SWITCHABLE_FILTERS + 1][SWITCHABLE_FILTERS - 1];
   int i, j;
-  for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
+  for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
     vp9_tree_probs_from_distribution(
         vp9_switchable_interp_tree,
         new_prob[j], branch_ct[j],
         pc->counts.switchable_interp[j], 0);
   }
-  for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
-    for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
+  for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
+    for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) {
       vp9_cond_prob_diff_update(bc, &pc->fc.switchable_interp_prob[j][i],
-                                VP9_MODE_UPDATE_PROB, branch_ct[j][i]);
+                                MODE_UPDATE_PROB, branch_ct[j][i]);
     }
   }
 #ifdef MODE_STATS
@@ -264,16 +264,16 @@ static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) {
   int i, j;
 
   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
-    unsigned int branch_ct[VP9_INTER_MODES - 1][2];
-    vp9_prob new_prob[VP9_INTER_MODES - 1];
+    unsigned int branch_ct[INTER_MODES - 1][2];
+    vp9_prob new_prob[INTER_MODES - 1];
 
     vp9_tree_probs_from_distribution(vp9_inter_mode_tree,
                                      new_prob, branch_ct,
                                      pc->counts.inter_mode[i], NEARESTMV);
 
-    for (j = 0; j < VP9_INTER_MODES - 1; ++j)
+    for (j = 0; j < INTER_MODES - 1; ++j)
       vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j],
-                                VP9_MODE_UPDATE_PROB, branch_ct[j]);
+                                MODE_UPDATE_PROB, branch_ct[j]);
   }
 }
 
@@ -1049,7 +1049,7 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
                                      ct_8x8p);
       for (j = 0; j < TX_SIZES - 3; j++)
         vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j],
-                                  VP9_MODE_UPDATE_PROB, ct_8x8p[j]);
+                                  MODE_UPDATE_PROB, ct_8x8p[j]);
     }
 
     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
@@ -1057,14 +1057,14 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
                                        ct_16x16p);
       for (j = 0; j < TX_SIZES - 2; j++)
         vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
-                                  VP9_MODE_UPDATE_PROB, ct_16x16p[j]);
+                                  MODE_UPDATE_PROB, ct_16x16p[j]);
     }
 
     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
       tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
       for (j = 0; j < TX_SIZES - 1; j++)
         vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
-                                  VP9_MODE_UPDATE_PROB, ct_32x32p[j]);
+                                  MODE_UPDATE_PROB, ct_32x32p[j]);
     }
 #ifdef MODE_STATS
     if (!cpi->dummy_packing)
@@ -1087,17 +1087,17 @@ static void fix_mcomp_filter_type(VP9_COMP *cpi) {
 
   if (cm->mcomp_filter_type == SWITCHABLE) {
     // Check to see if only one of the filters is actually used
-    int count[VP9_SWITCHABLE_FILTERS];
+    int count[SWITCHABLE_FILTERS];
     int i, j, c = 0;
-    for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+    for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
       count[i] = 0;
-      for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j)
+      for (j = 0; j <= SWITCHABLE_FILTERS; ++j)
         count[i] += cm->counts.switchable_interp[j][i];
       c += (count[i] > 0);
     }
     if (c == 1) {
       // Only one filter is used. So set the filter at frame level
-      for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
         if (count[i]) {
           cm->mcomp_filter_type = i;
           break;
@@ -1386,7 +1386,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
 
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
       vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i],
-                                VP9_MODE_UPDATE_PROB,
+                                MODE_UPDATE_PROB,
                                 cpi->intra_inter_count[i]);
 
     if (cm->allow_comp_inter_inter) {
@@ -1400,7 +1400,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
         if (use_hybrid_pred)
           for (i = 0; i < COMP_INTER_CONTEXTS; i++)
             vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
-                                      VP9_MODE_UPDATE_PROB,
+                                      MODE_UPDATE_PROB,
                                       cpi->comp_inter_count[i]);
       }
     }
@@ -1408,10 +1408,10 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
     if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
       for (i = 0; i < REF_CONTEXTS; i++) {
         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
-                                  VP9_MODE_UPDATE_PROB,
+                                  MODE_UPDATE_PROB,
                                   cpi->single_ref_count[i][0]);
         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1],
-                                  VP9_MODE_UPDATE_PROB,
+                                  MODE_UPDATE_PROB,
                                   cpi->single_ref_count[i][1]);
       }
     }
@@ -1419,7 +1419,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
     if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
       for (i = 0; i < REF_CONTEXTS; i++)
         vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
-                                  VP9_MODE_UPDATE_PROB,
+                                  MODE_UPDATE_PROB,
                                   cpi->comp_ref_count[i]);
 
     update_mbintra_mode_probs(cpi, &header_bc);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 790b3c22c..9426f44ab 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -48,7 +48,7 @@ typedef struct {
   int comp_pred_diff;
   int single_pred_diff;
   int64_t tx_rd_diff[TX_MODES];
-  int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
+  int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
 
   // Bit flag for each mode whether it has high error in comparison to others.
   unsigned int modes_with_high_error;
@@ -121,9 +121,9 @@ struct macroblock {
   int mbmode_cost[MB_MODE_COUNT];
   unsigned inter_mode_cost[INTER_MODE_CONTEXTS][MB_MODE_COUNT - NEARESTMV];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
-  int y_mode_costs[VP9_INTRA_MODES][VP9_INTRA_MODES][VP9_INTRA_MODES];
-  int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1]
-                             [VP9_SWITCHABLE_FILTERS];
+  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+  int switchable_interp_costs[SWITCHABLE_FILTERS + 1]
+                             [SWITCHABLE_FILTERS];
 
   // These define limits to motion vector components to prevent them
   // from extending outside the UMV borders
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f8da500e6..360abad77 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -56,7 +56,7 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
  * This also avoids the need for divide by zero checks in
  *  vp9_activity_masking().
  */
-#define VP9_ACTIVITY_AVG_MIN (64)
+#define ACTIVITY_AVG_MIN (64)
 
 /* Motion vector component magnitude threshold for defining fast motion. */
 #define FAST_MOTION_MV_THRESH (24)
@@ -131,8 +131,8 @@ static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) {
     mb_activity = tt_activity_measure(x);
   }
 
-  if (mb_activity < VP9_ACTIVITY_AVG_MIN)
-    mb_activity = VP9_ACTIVITY_AVG_MIN;
+  if (mb_activity < ACTIVITY_AVG_MIN)
+    mb_activity = ACTIVITY_AVG_MIN;
 
   return mb_activity;
 }
@@ -182,8 +182,8 @@ static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
   cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs);
 #endif  // ACT_MEDIAN
 
-  if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN)
-    cpi->activity_avg = VP9_ACTIVITY_AVG_MIN;
+  if (cpi->activity_avg < ACTIVITY_AVG_MIN)
+    cpi->activity_avg = ACTIVITY_AVG_MIN;
 
   // Experimental code: return fixed value normalized for several clips
   if (ALT_ACT_MEASURE)
@@ -448,7 +448,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
     cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
     cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
 
-    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+    for (i = 0; i <= SWITCHABLE_FILTERS; i++)
       cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
   }
 }
@@ -2414,15 +2414,15 @@ void vp9_encode_frame(VP9_COMP *cpi) {
         cpi->rd_filter_threshes[frame_type][1] >
             cpi->rd_filter_threshes[frame_type][2] &&
         cpi->rd_filter_threshes[frame_type][1] >
-            cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+            cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
       filter_type = EIGHTTAP_SMOOTH;
     } else if (cpi->rd_filter_threshes[frame_type][2] >
             cpi->rd_filter_threshes[frame_type][0] &&
         cpi->rd_filter_threshes[frame_type][2] >
-            cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+            cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
       filter_type = EIGHTTAP_SHARP;
     } else if (cpi->rd_filter_threshes[frame_type][0] >
-                  cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+                  cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
       filter_type = EIGHTTAP;
     } else {
       filter_type = SWITCHABLE;
@@ -2445,7 +2445,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
     }
 
-    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+    for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
       const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;
       cpi->rd_filter_threshes[frame_type][i] =
           (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 2b7cb0b5b..1203c00ab 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -236,22 +236,22 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) {
 
   for (j = 0; j < MV_JOINTS - 1; ++j)
     update_mv(bc, branch_ct_joint[j], &mvc->joints[j], prob.joints[j],
-              VP9_NMV_UPDATE_PROB);
+              NMV_UPDATE_PROB);
 
   for (i = 0; i < 2; ++i) {
     update_mv(bc, branch_ct_sign[i], &mvc->comps[i].sign,
-              prob.comps[i].sign, VP9_NMV_UPDATE_PROB);
+              prob.comps[i].sign, NMV_UPDATE_PROB);
     for (j = 0; j < MV_CLASSES - 1; ++j)
       update_mv(bc, branch_ct_classes[i][j], &mvc->comps[i].classes[j],
-                prob.comps[i].classes[j], VP9_NMV_UPDATE_PROB);
+                prob.comps[i].classes[j], NMV_UPDATE_PROB);
 
     for (j = 0; j < CLASS0_SIZE - 1; ++j)
       update_mv(bc, branch_ct_class0[i][j], &mvc->comps[i].class0[j],
-                prob.comps[i].class0[j], VP9_NMV_UPDATE_PROB);
+                prob.comps[i].class0[j], NMV_UPDATE_PROB);
 
     for (j = 0; j < MV_OFFSET_BITS; ++j)
       update_mv(bc, branch_ct_bits[i][j], &mvc->comps[i].bits[j],
-                prob.comps[i].bits[j], VP9_NMV_UPDATE_PROB);
+                prob.comps[i].bits[j], NMV_UPDATE_PROB);
   }
 
   for (i = 0; i < 2; ++i) {
@@ -260,20 +260,20 @@ void vp9_write_nmv_probs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) {
       for (k = 0; k < 3; ++k)
         update_mv(bc, branch_ct_class0_fp[i][j][k],
                   &mvc->comps[i].class0_fp[j][k],
-                  prob.comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB);
+                  prob.comps[i].class0_fp[j][k], NMV_UPDATE_PROB);
     }
 
     for (j = 0; j < 3; ++j)
       update_mv(bc, branch_ct_fp[i][j], &mvc->comps[i].fp[j],
-                prob.comps[i].fp[j], VP9_NMV_UPDATE_PROB);
+                prob.comps[i].fp[j], NMV_UPDATE_PROB);
   }
 
   if (usehp) {
     for (i = 0; i < 2; ++i) {
       update_mv(bc, branch_ct_class0_hp[i], &mvc->comps[i].class0_hp,
-                prob.comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
+                prob.comps[i].class0_hp, NMV_UPDATE_PROB);
       update_mv(bc, branch_ct_hp[i], &mvc->comps[i].hp,
-                prob.comps[i].hp, VP9_NMV_UPDATE_PROB);
+                prob.comps[i].hp, NMV_UPDATE_PROB);
     }
   }
 }
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index 993aba767..5b2365308 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -20,8 +20,8 @@ void vp9_init_mode_costs(VP9_COMP *c) {
   const vp9_tree_p KT = vp9_intra_mode_tree;
   int i, j;
 
-  for (i = 0; i < VP9_INTRA_MODES; i++) {
-    for (j = 0; j < VP9_INTRA_MODES; j++) {
+  for (i = 0; i < INTRA_MODES; i++) {
+    for (j = 0; j < INTRA_MODES; j++) {
       vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                       KT);
     }
@@ -31,12 +31,12 @@ void vp9_init_mode_costs(VP9_COMP *c) {
   vp9_cost_tokens(c->mb.mbmode_cost, x->fc.y_mode_prob[1],
                   vp9_intra_mode_tree);
   vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
-                  x->fc.uv_mode_prob[VP9_INTRA_MODES - 1], vp9_intra_mode_tree);
+                  x->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
   vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
-                  vp9_kf_uv_mode_prob[VP9_INTRA_MODES - 1],
+                  vp9_kf_uv_mode_prob[INTRA_MODES - 1],
                   vp9_intra_mode_tree);
 
-  for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
+  for (i = 0; i <= SWITCHABLE_FILTERS; ++i)
     vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                     x->fc.switchable_interp_prob[i],
                     vp9_switchable_interp_tree);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 7e44ebd04..34bd43ef1 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -98,9 +98,9 @@ FILE *keyfile;
 
 
 #ifdef ENTROPY_STATS
-extern int intra_mode_stats[VP9_INTRA_MODES]
-                           [VP9_INTRA_MODES]
-                           [VP9_INTRA_MODES];
+extern int intra_mode_stats[INTRA_MODES]
+                           [INTRA_MODES]
+                           [INTRA_MODES];
 #endif
 
 #ifdef MODE_STATS
@@ -444,9 +444,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
 void vp9_update_mode_context_stats(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
   int i, j;
-  unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] =
+  unsigned int (*inter_mode_counts)[INTER_MODES - 1][2] =
       cm->fc.inter_mode_counts;
-  int64_t (*mv_ref_stats)[VP9_INTER_MODES - 1][2] = cpi->mv_ref_stats;
+  int64_t (*mv_ref_stats)[INTER_MODES - 1][2] = cpi->mv_ref_stats;
   FILE *f;
 
   // Read the past stats counters
@@ -460,7 +460,7 @@ void vp9_update_mode_context_stats(VP9_COMP *cpi) {
 
   // Add in the values for this frame
   for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
-    for (j = 0; j < VP9_INTER_MODES - 1; j++) {
+    for (j = 0; j < INTER_MODES - 1; j++) {
       mv_ref_stats[i][j][0] += (int64_t)inter_mode_counts[i][j][0];
       mv_ref_stats[i][j][1] += (int64_t)inter_mode_counts[i][j][1];
     }
@@ -479,12 +479,12 @@ void print_mode_context(VP9_COMP *cpi) {
   fprintf(f, "#include \"vp9_entropy.h\"\n");
   fprintf(
       f,
-      "const int inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1] =");
+      "const int inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] =");
   fprintf(f, "{\n");
   for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
     fprintf(f, "  {/* %d */ ", j);
     fprintf(f, "    ");
-    for (i = 0; i < VP9_INTER_MODES - 1; i++) {
+    for (i = 0; i < INTER_MODES - 1; i++) {
       int this_prob;
       int64_t count = cpi->mv_ref_stats[j][i][0] + cpi->mv_ref_stats[j][i][1];
       if (count)
@@ -735,7 +735,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->mode_search_skip_flags = 0;
   sf->disable_split_var_thresh = 0;
   sf->disable_filter_search_var_thresh = 0;
-  sf->last_chroma_intra_mode = TM_PRED;
+  sf->intra_y_mode_mask = ALL_INTRA_MODES;
+  sf->intra_uv_mode_mask = ALL_INTRA_MODES;
   sf->use_rd_breakout = 0;
   sf->skip_encode_sb = 0;
   sf->use_uv_intra_rd_estimate = 0;
@@ -765,7 +766,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
       sf->static_segmentation = 0;
 #endif
       sf->use_avoid_tested_higherror = 1;
-      sf->adaptive_rd_thresh = 1;
+      sf->adaptive_rd_thresh = MIN((speed + 1), 4);
 
       if (speed == 1) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -798,6 +799,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         // the main framework of partition search type.
         sf->disable_split_var_thresh = 0;
         sf->disable_filter_search_var_thresh = 16;
+
+        sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
+        sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
       }
       if (speed == 2) {
         sf->adjust_thresholds_by_speed = 1;
@@ -819,7 +823,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
                                      FLAG_SKIP_COMP_REFMISMATCH |
                                      FLAG_SKIP_INTRA_LOWVAR |
                                      FLAG_EARLY_TERMINATE;
-        sf->last_chroma_intra_mode = DC_PRED;
+        sf->intra_y_mode_mask = INTRA_DC_TM;
+        sf->intra_uv_mode_mask = INTRA_DC_TM;
         sf->use_uv_intra_rd_estimate = 1;
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
@@ -859,6 +864,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->subpel_iters_per_step = 1;
         sf->disable_split_var_thresh = 64;
         sf->disable_filter_search_var_thresh = 64;
+        sf->intra_y_mode_mask = INTRA_DC_ONLY;
+        sf->intra_uv_mode_mask = INTRA_DC_ONLY;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -1395,7 +1402,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
 }
 
 VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
-  int i;
+  int i, j;
   volatile union {
     VP9_COMP *cpi;
     VP9_PTR   ptr;
@@ -1597,9 +1604,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   vp9_set_speed_features(cpi);
 
-  // Set starting values of RD threshold multipliers (128 = *1)
-  for (i = 0; i < MAX_MODES; i++)
-    cpi->rd_thresh_mult[i] = 128;
+  // Default rd threshold factors for mode selection
+  for (i = 0; i < BLOCK_SIZES; ++i)
+    for (j = 0; j < MAX_MODES; ++j)
+      cpi->rd_thresh_freq_fact[i][j] = 32;
 
 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
             SDX3F, SDX8F, SDX4DF)\
@@ -1800,18 +1808,18 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
 
       fprintf(fmode, "\n#include \"vp9_entropymode.h\"\n\n");
       fprintf(fmode, "const unsigned int vp9_kf_default_bmode_counts ");
-      fprintf(fmode, "[VP9_INTRA_MODES][VP9_INTRA_MODES]"
-                     "[VP9_INTRA_MODES] =\n{\n");
+      fprintf(fmode, "[INTRA_MODES][INTRA_MODES]"
+                     "[INTRA_MODES] =\n{\n");
 
-      for (i = 0; i < VP9_INTRA_MODES; i++) {
+      for (i = 0; i < INTRA_MODES; i++) {
 
         fprintf(fmode, "    { // Above Mode :  %d\n", i);
 
-        for (j = 0; j < VP9_INTRA_MODES; j++) {
+        for (j = 0; j < INTRA_MODES; j++) {
 
           fprintf(fmode, "        {");
 
-          for (k = 0; k < VP9_INTRA_MODES; k++) {
+          for (k = 0; k < INTRA_MODES; k++) {
             if (!intra_mode_stats[i][j][k])
               fprintf(fmode, " %5d, ", 1);
             else
@@ -2629,8 +2637,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 
   // Set various flags etc to special state if it is a key frame
   if (cm->frame_type == KEY_FRAME) {
-    int i;
-
     // Reset the loop filter deltas and segmentation map
     setup_features(cm);
 
@@ -2643,10 +2649,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     // The alternate reference frame cannot be active for a key frame
     cpi->source_alt_ref_active = 0;
 
-    // Reset the RD threshold multipliers to default of * 1 (128)
-    for (i = 0; i < MAX_MODES; i++)
-      cpi->rd_thresh_mult[i] = 128;
-
     cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
     cm->frame_parallel_decoding_mode =
       (cpi->oxcf.frame_parallel_decoding_mode != 0);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index c7b35a8c6..653615949 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -79,15 +79,15 @@ typedef struct {
 
   vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
 
-  vp9_prob y_mode_prob[4][VP9_INTRA_MODES - 1];
-  vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
+  vp9_prob y_mode_prob[4][INTRA_MODES - 1];
+  vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
   vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 
-  vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
-                                 [VP9_SWITCHABLE_FILTERS - 1];
+  vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
+                                 [SWITCHABLE_FILTERS - 1];
 
-  int inter_mode_counts[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2];
-  vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
+  int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
+  vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
 
   struct tx_probs tx_probs;
   vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
@@ -238,6 +238,11 @@ typedef enum {
   // Other methods to come
 } SUBPEL_SEARCH_METHODS;
 
+#define ALL_INTRA_MODES 0x3FF
+#define INTRA_DC_ONLY 0x01
+#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
+#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
+
 typedef struct {
   int RD;
   SEARCH_METHODS search_method;
@@ -288,7 +293,8 @@ typedef struct {
   // A source variance threshold below which filter search is disabled
   // Choose a very large value (UINT_MAX) to use 8-tap always
   unsigned int disable_filter_search_var_thresh;
-  MB_PREDICTION_MODE last_chroma_intra_mode;
+  int intra_y_mode_mask;
+  int intra_uv_mode_mask;
   int use_rd_breakout;
   int use_uv_intra_rd_estimate;
   int use_fast_lpf_pick;
@@ -375,8 +381,6 @@ typedef struct VP9_COMP {
   int ref_frame_mask;
   int set_ref_frame_mask;
 
-  int rd_thresh_mult[MAX_MODES];
-  int rd_baseline_thresh[BLOCK_SIZES][MAX_MODES];
   int rd_threshes[BLOCK_SIZES][MAX_MODES];
   int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
 
@@ -392,9 +396,9 @@ typedef struct VP9_COMP {
   // FIXME(rbultje) can this overflow?
   int rd_tx_select_threshes[4][TX_MODES];
 
-  int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
-  int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1];
-  int64_t rd_filter_cache[VP9_SWITCHABLE_FILTERS + 1];
+  int64_t rd_filter_diff[SWITCHABLE_FILTERS + 1];
+  int64_t rd_filter_threshes[4][SWITCHABLE_FILTERS + 1];
+  int64_t rd_filter_cache[SWITCHABLE_FILTERS + 1];
 
   int RDMULT;
   int RDDIV;
@@ -469,8 +473,8 @@ typedef struct VP9_COMP {
 
   int cq_target_quality;
 
-  int y_mode_count[4][VP9_INTRA_MODES];
-  int y_uv_mode_count[VP9_INTRA_MODES][VP9_INTRA_MODES];
+  int y_mode_count[4][INTRA_MODES];
+  int y_uv_mode_count[INTRA_MODES][INTRA_MODES];
   unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 
   nmv_context_counts NMVcount;
@@ -635,8 +639,8 @@ typedef struct VP9_COMP {
 
   int dummy_packing;    /* flag to indicate if packing is dummy */
 
-  unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
-                                      [VP9_SWITCHABLE_FILTERS];
+  unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1]
+                                      [SWITCHABLE_FILTERS];
 
   unsigned int txfm_stepdown_count[TX_SIZES];
 
@@ -657,7 +661,7 @@ typedef struct VP9_COMP {
 #endif
 
 #ifdef ENTROPY_STATS
-  int64_t mv_ref_stats[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2];
+  int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
 #endif
 } VP9_COMP;
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 08b0c454a..74282aafe 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -104,9 +104,8 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
   {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
 
-#define BASE_RD_THRESH_FREQ_FACT 16
-#define MAX_RD_THRESH_FREQ_FACT 32
-#define MAX_RD_THRESH_FREQ_INC 1
+#define MAX_RD_THRESH_FACT 64
+#define RD_THRESH_INC 1
 
 static void fill_token_costs(vp9_coeff_cost *c,
                              vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
@@ -212,12 +211,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
         } else {
           cpi->rd_threshes[bsize][i] = INT_MAX;
         }
-        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
-
-        if (cpi->sf.adaptive_rd_thresh)
-          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
-        else
-          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
       }
     }
   } else {
@@ -236,12 +229,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
         } else {
           cpi->rd_threshes[bsize][i] = INT_MAX;
         }
-        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
-
-        if (cpi->sf.adaptive_rd_thresh)
-          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
-        else
-          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
       }
     }
   }
@@ -1043,6 +1030,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     int64_t this_rd;
     int ratey = 0;
+
+    if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
+      continue;
+
     // Only do the oblique modes if the best so far is
     // one of the neighboring directional modes
     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -1228,6 +1219,9 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
     int64_t local_tx_cache[TX_MODES];
     const int mis = xd->mode_info_stride;
 
+    if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
+      continue;
+
     if (cpi->common.frame_type == KEY_FRAME) {
       const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
       const MB_PREDICTION_MODE L = xd->left_available ?
@@ -1325,10 +1319,14 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
 
-  MB_PREDICTION_MODE last_mode = bsize <= BLOCK_8X8 ?
-              TM_PRED : cpi->sf.last_chroma_intra_mode;
+  // int mode_mask = (bsize <= BLOCK_8X8)
+  //                ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
+
+  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+    // if (!(mode_mask & (1 << mode)))
+    if (!(cpi->sf.intra_uv_mode_mask & (1 << mode)))
+      continue;
 
-  for (mode = DC_PRED; mode <= last_mode; mode++) {
     x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
     super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
                      &this_distortion, &s, &this_sse, bsize, best_rd);
@@ -1599,7 +1597,7 @@ typedef struct {
   int64_t sse;
   int segment_yrate;
   MB_PREDICTION_MODE modes[4];
-  SEG_RDSTAT rdstat[4][VP9_INTER_MODES];
+  SEG_RDSTAT rdstat[4][INTER_MODES];
   int mvthresh;
 } BEST_SEG_INFO;
 
@@ -1962,7 +1960,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       if (best_rd == INT64_MAX) {
         int iy, midx;
         for (iy = i + 1; iy < 4; ++iy)
-          for (midx = 0; midx < VP9_INTER_MODES; ++midx)
+          for (midx = 0; midx < INTER_MODES; ++midx)
             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
         bsi->segment_rd = INT64_MAX;
         return;
@@ -1986,7 +1984,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       if (this_segment_rd > bsi->segment_rd) {
         int iy, midx;
         for (iy = i + 1; iy < 4; ++iy)
-          for (midx = 0; midx < VP9_INTER_MODES; ++midx)
+          for (midx = 0; midx < INTER_MODES; ++midx)
             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
         bsi->segment_rd = INT64_MAX;
         return;
@@ -2189,7 +2187,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                          int_mv *second_ref_mv,
                          int64_t comp_pred_diff[NB_PREDICTION_TYPES],
                          int64_t tx_size_diff[TX_MODES],
-                         int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {
+                         int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) {
   MACROBLOCKD *const xd = &x->e_mbd;
 
   // Take a snapshot of the coding context so it can be
@@ -2212,7 +2210,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
   // doesn't actually work this way
   memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
   memcpy(ctx->best_filter_diff, best_filter_diff,
-         sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));
+         sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
 }
 
 static void setup_pred_block(const MACROBLOCKD *xd,
@@ -2259,10 +2257,10 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
 
   scale[frame_type].x_offset_q4 =
       ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
-       VP9_REF_SCALE_SHIFT) & 0xf;
+       REF_SCALE_SHIFT) & 0xf;
   scale[frame_type].y_offset_q4 =
       ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
-       VP9_REF_SCALE_SHIFT) & 0xf;
+       REF_SCALE_SHIFT) & 0xf;
 
   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
   // use the UV scaling factors.
@@ -2747,8 +2745,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       int tmp_rate_sum = 0;
       int64_t tmp_dist_sum = 0;
 
-      cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
-      for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+      cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
+      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
         int j;
         int64_t rs_rd;
         mbmi->interp_filter = i;
@@ -2759,8 +2757,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         if (i > 0 && intpel_mv) {
           cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
                                            tmp_rate_sum, tmp_dist_sum);
-          cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
-              MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+          cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
+              MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
                   cpi->rd_filter_cache[i] + rs_rd);
           rd = cpi->rd_filter_cache[i];
           if (cm->mcomp_filter_type == SWITCHABLE)
@@ -2787,8 +2785,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
           cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
                                            rate_sum, dist_sum);
-          cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
-              MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+          cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
+              MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
                   cpi->rd_filter_cache[i] + rs_rd);
           rd = cpi->rd_filter_cache[i];
           if (cm->mcomp_filter_type == SWITCHABLE)
@@ -3080,8 +3078,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_tx_diff[TX_MODES];
   int64_t best_pred_diff[NB_PREDICTION_TYPES];
   int64_t best_pred_rd[NB_PREDICTION_TYPES];
-  int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];
-  int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
+  int64_t best_filter_rd[SWITCHABLE_FILTERS + 1];
+  int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
   MB_MODE_INFO best_mbmode = { 0 };
   int j;
   int mode_index, best_mode_index = 0;
@@ -3132,7 +3130,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     best_pred_rd[i] = INT64_MAX;
   for (i = 0; i < TX_MODES; i++)
     best_tx_rd[i] = INT64_MAX;
-  for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+  for (i = 0; i <= SWITCHABLE_FILTERS; i++)
     best_filter_rd[i] = INT64_MAX;
   for (i = 0; i < TX_SIZES; i++)
     rate_uv_intra[i] = INT_MAX;
@@ -3216,7 +3214,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     // Test best rd so far against threshold for trying this mode.
     if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
-                     cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) ||
+                     cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) ||
         cpi->rd_threshes[bsize][mode_index] == INT_MAX)
       continue;
 
@@ -3452,7 +3450,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       union b_mode_info tmp_best_bmodes[16];
       MB_MODE_INFO tmp_best_mbmode;
       PARTITION_INFO tmp_best_partition;
-      BEST_SEG_INFO bsi[VP9_SWITCHABLE_FILTERS];
+      BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
       int pred_exists = 0;
       int uv_skippable;
       if (is_comp_pred) {
@@ -3472,7 +3470,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
           cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
       xd->mode_info_context->mbmi.txfm_size = TX_4X4;
 
-      cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
+      cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
       if (cm->mcomp_filter_type != BILINEAR) {
         tmp_best_filter = EIGHTTAP;
         if (x->source_variance <
@@ -3481,7 +3479,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
           vp9_zero(cpi->rd_filter_cache);
         } else {
           for (switchable_filter_index = 0;
-               switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+               switchable_filter_index < SWITCHABLE_FILTERS;
                ++switchable_filter_index) {
             int newbest, rs;
             int64_t rs_rd;
@@ -3503,8 +3501,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
             cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
             rs = get_switchable_rate(x);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
-            cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
-                MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+            cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
+                MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
                     tmp_rd + rs_rd);
             if (cm->mcomp_filter_type == SWITCHABLE)
               tmp_rd += rs_rd;
@@ -3721,7 +3719,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     if (!disable_skip && ref_frame == INTRA_FRAME) {
       for (i = 0; i < NB_PREDICTION_TYPES; ++i)
         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
-      for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+      for (i = 0; i <= SWITCHABLE_FILTERS; i++)
         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
     }
 
@@ -3777,29 +3775,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
           }
         }
       }
-#if 0
-      // Testing this mode gave rise to an improvement in best error score.
-      // Lower threshold a bit for next time
-      cpi->rd_thresh_mult[mode_index] =
-          (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
-              cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
-      cpi->rd_threshes[mode_index] =
-          (cpi->rd_baseline_thresh[mode_index] >> 7)
-              * cpi->rd_thresh_mult[mode_index];
-#endif
-    } else {
-      // If the mode did not help improve the best error case then
-      // raise the threshold for testing that mode next time around.
-#if 0
-      cpi->rd_thresh_mult[mode_index] += 4;
-
-      if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-        cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-
-      cpi->rd_threshes[mode_index] =
-          (cpi->rd_baseline_thresh[mode_index] >> 7)
-              * cpi->rd_thresh_mult[mode_index];
-#endif
     }
 
     /* keep record of best compound/single-only prediction */
@@ -3832,8 +3807,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
         cm->mcomp_filter_type != BILINEAR) {
       int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
-                              VP9_SWITCHABLE_FILTERS : cm->mcomp_filter_type];
-      for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+                              SWITCHABLE_FILTERS : cm->mcomp_filter_type];
+      for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
         int64_t adj_rd;
         // In cases of poor prediction, filter_cache[] can contain really big
         // values, which actually are bigger than this_rd itself. This can
@@ -3942,33 +3917,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
       if (mode_index == best_mode_index) {
-        cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT;
+        cpi->rd_thresh_freq_fact[bsize][mode_index] -=
+          (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
       } else {
-        cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC;
+        cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
         if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
-            (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) {
+            (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT)) {
           cpi->rd_thresh_freq_fact[bsize][mode_index] =
-            cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT;
+            cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT;
         }
       }
     }
   }
 
-  // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
-#if 0
-  // Reduce the activation RD thresholds for the best choice mode
-  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
-      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
-    int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
-
-    cpi->rd_thresh_mult[best_mode_index] =
-      (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
-      cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
-    cpi->rd_threshes[best_mode_index] =
-      (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
-  }
-#endif
-
   // macroblock modes
   *mbmi = best_mbmode;
   x->skip |= best_skip2;
@@ -4003,14 +3964,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (!x->skip) {
-    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+    for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
       if (best_filter_rd[i] == INT64_MAX)
         best_filter_diff[i] = 0;
       else
         best_filter_diff[i] = best_rd - best_filter_rd[i];
     }
     if (cm->mcomp_filter_type == SWITCHABLE)
-      assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0);
+      assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
   } else {
     vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
   }
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index 325925cbd..155ba8a3e 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -50,8 +50,8 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
   uint8_t temp2[68 * 64];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 64, hfilter);
@@ -73,8 +73,8 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 64, hfilter);
@@ -107,8 +107,8 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
   uint8_t temp2[68 * 64];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 32, hfilter);
@@ -130,8 +130,8 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 32, hfilter);
@@ -164,8 +164,8 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
   uint8_t temp2[36 * 32];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 32, hfilter);
@@ -187,8 +187,8 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 32, hfilter);
@@ -221,8 +221,8 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
   uint8_t temp2[36 * 32];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 16, hfilter);
@@ -244,8 +244,8 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 16, hfilter);
@@ -442,8 +442,8 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
   const int16_t *hfilter, *vfilter;
   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   // First filter 1d Horizontal
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
@@ -468,8 +468,8 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   // First filter 1d Horizontal
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
@@ -492,8 +492,8 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
   uint8_t temp2[20 * 16];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 8, hfilter);
@@ -515,8 +515,8 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 8, hfilter);
@@ -536,8 +536,8 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
   uint8_t temp2[20 * 16];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 16, hfilter);
@@ -559,8 +559,8 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 16, hfilter);
@@ -581,8 +581,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
   uint8_t temp2[68 * 64];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 64, hfilter);
@@ -604,8 +604,8 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 64, hfilter);
@@ -625,8 +625,8 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
   uint8_t temp2[36 * 32];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 32, hfilter);
@@ -648,8 +648,8 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 32, hfilter);
@@ -789,8 +789,8 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
   uint8_t temp2[20 * 16];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 16, hfilter);
@@ -812,8 +812,8 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 16, hfilter);
@@ -833,8 +833,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
   uint8_t temp2[20 * 16];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 8, hfilter);
@@ -856,8 +856,8 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 8, hfilter);
@@ -877,8 +877,8 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
   uint8_t temp2[20 * 16];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 8, hfilter);
@@ -900,8 +900,8 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 8, hfilter);
@@ -923,8 +923,8 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
   uint8_t temp2[20 * 16];
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 4, hfilter);
@@ -946,8 +946,8 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
   const int16_t *hfilter, *vfilter;
 
-  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 4, hfilter);
diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm
index 19e2feb57..533456b77 100644
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -270,8 +270,13 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
 %if mmsize == 16
   movhps               m2, [srcq+src_strideq*2]
 %else ; mmsize == 8
+%if %1 == 4
+  movh                 m1, [srcq+src_strideq*2]
+  punpckldq            m2, m1
+%else
   punpckldq            m2, [srcq+src_strideq*2]
 %endif
+%endif
   movh                 m1, [dstq]
 %if mmsize == 16
   movlhps              m0, m2
@@ -542,9 +547,16 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
   movhps               m2, [srcq+src_strideq]
   movhps               m3, [srcq+src_strideq+1]
 %else
+%if %1 == 4
+  movh                 m1, [srcq+src_strideq]
+  punpckldq            m2, m1
+  movh                 m1, [srcq+src_strideq+1]
+  punpckldq            m3, m1
+%else
   punpckldq            m2, [srcq+src_strideq]
   punpckldq            m3, [srcq+src_strideq+1]
 %endif
+%endif
   pavgb                m2, m3
 %if mmsize == 16
   movlhps              m0, m2
diff --git a/vp9/encoder/x86/vp9_variance_impl_mmx.asm b/vp9/encoder/x86/vp9_variance_impl_mmx.asm
index d3dbefed8..3501cf1fd 100644
--- a/vp9/encoder/x86/vp9_variance_impl_mmx.asm
+++ b/vp9/encoder/x86/vp9_variance_impl_mmx.asm
@@ -342,8 +342,8 @@ sym(vp9_get4x4var_mmx):
         movsxd      rdx, dword ptr arg(3) ;[recon_stride]
 
         ; Row 1
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm0, [rax]                  ; Copy 4 bytes to mm0
+        movd        mm1, [rbx]                  ; Copy 4 bytes to mm1
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
         psubsw      mm0, mm1                    ; A-B (low order) to MM0
@@ -351,12 +351,12 @@ sym(vp9_get4x4var_mmx):
         pmaddwd     mm0, mm0                    ; square and accumulate
         add         rbx,rdx                     ; Inc pointer into ref data
         add         rax,rcx                     ; Inc pointer into the new data
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm1, [rbx]                  ; Copy 4 bytes to mm1
         paddd       mm7, mm0                    ; accumulate in mm7
 
 
         ; Row 2
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
+        movd        mm0, [rax]                  ; Copy 4 bytes to mm0
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
         psubsw      mm0, mm1                    ; A-B (low order) to MM0
@@ -365,11 +365,11 @@ sym(vp9_get4x4var_mmx):
         pmaddwd     mm0, mm0                    ; square and accumulate
         add         rbx,rdx                     ; Inc pointer into ref data
         add         rax,rcx                     ; Inc pointer into the new data
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm1, [rbx]                  ; Copy 4 bytes to mm1
         paddd       mm7, mm0                    ; accumulate in mm7
 
         ; Row 3
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
+        movd        mm0, [rax]                  ; Copy 4 bytes to mm0
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
         psubsw      mm0, mm1                    ; A-B (low order) to MM0
@@ -378,11 +378,11 @@ sym(vp9_get4x4var_mmx):
         pmaddwd     mm0, mm0                    ; square and accumulate
         add         rbx,rdx                     ; Inc pointer into ref data
         add         rax,rcx                     ; Inc pointer into the new data
-        movq        mm1, [rbx]                  ; Copy eight bytes to mm1
+        movd        mm1, [rbx]                  ; Copy 4 bytes to mm1
         paddd       mm7, mm0                    ; accumulate in mm7
 
         ; Row 4
-        movq        mm0, [rax]                  ; Copy eight bytes to mm0
+        movd        mm0, [rax]                  ; Copy 4 bytes to mm0
 
         punpcklbw   mm0, mm6                    ; unpack to higher prrcision
         punpcklbw   mm1, mm6
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index c6f398101..d5692efb1 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -96,6 +96,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_1_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct4x4_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM)