28 files changed, 634 insertions, 2805 deletions
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 70cdb6aaf..36776ab21 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -45,32 +45,26 @@ unsigned int tree_update_hist [BLOCK_TYPES]
                               [COEF_BANDS]
                               [PREV_COEF_CONTEXTS]
                               [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM
 unsigned int hybrid_tree_update_hist [BLOCK_TYPES]
                                      [COEF_BANDS]
                                      [PREV_COEF_CONTEXTS]
                                      [ENTROPY_NODES][2];
-#endif
 unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8]
                                   [COEF_BANDS]
                                   [PREV_COEF_CONTEXTS]
                                   [ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM8X8
 unsigned int hybrid_tree_update_hist_8x8 [BLOCK_TYPES_8X8]
                                          [COEF_BANDS]
                                          [PREV_COEF_CONTEXTS]
                                          [ENTROPY_NODES] [2];
-#endif
 unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16]
                                     [COEF_BANDS]
                                     [PREV_COEF_CONTEXTS]
                                     [ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM16X16
 unsigned int hybrid_tree_update_hist_16x16 [BLOCK_TYPES_16X16]
                                            [COEF_BANDS]
                                            [PREV_COEF_CONTEXTS]
                                            [ENTROPY_NODES] [2];
-#endif
 
 extern unsigned int active_section;
 #endif
@@ -265,57 +259,23 @@ void update_skip_probs(VP8_COMP *cpi) {
   }
 }
 
-#if CONFIG_SWITCHABLE_INTERP
 void update_switchable_interp_probs(VP8_COMP *cpi, vp8_writer* const bc) {
   VP8_COMMON *const pc = &cpi->common;
   unsigned int branch_ct[32][2];
   int i, j;
   for (j = 0; j <= VP8_SWITCHABLE_FILTERS; ++j) {
-  //for (j = 0; j <= 0; ++j) {
-/*
-    if (!cpi->dummy_packing)
-#if VP8_SWITCHABLE_FILTERS == 3
-      printf("HELLO %d %d %d\n", cpi->switchable_interp_count[j][0],
-             cpi->switchable_interp_count[j][1], cpi->switchable_interp_count[j][2]);
-#else
-      printf("HELLO %d %d\n", cpi->switchable_interp_count[j][0],
-             cpi->switchable_interp_count[j][1]);
-#endif
-*/
     vp8_tree_probs_from_distribution(
         VP8_SWITCHABLE_FILTERS,
         vp8_switchable_interp_encodings, vp8_switchable_interp_tree,
-        pc->fc.switchable_interp_prob[j], branch_ct, cpi->switchable_interp_count[j],
-        256, 1
-        );
+        pc->fc.switchable_interp_prob[j], branch_ct,
+        cpi->switchable_interp_count[j], 256, 1);
     for (i = 0; i < VP8_SWITCHABLE_FILTERS - 1; ++i) {
       if (pc->fc.switchable_interp_prob[j][i] < 1)
         pc->fc.switchable_interp_prob[j][i] = 1;
       vp8_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8);
-/*
-      if (!cpi->dummy_packing)
-#if VP8_SWITCHABLE_FILTERS == 3
-        printf("Probs %d %d [%d]\n",
-               pc->fc.switchable_interp_prob[j][0],
-               pc->fc.switchable_interp_prob[j][1], pc->frame_type);
-#else
-        printf("Probs %d [%d]\n", pc->fc.switchable_interp_prob[j][0],
-               pc->frame_type);
-#endif
-*/
     }
   }
-  /*
-  if (!cpi->dummy_packing)
-#if VP8_SWITCHABLE_FILTERS == 3
-    printf("Probs %d %d [%d]\n",
-           pc->fc.switchable_interp_prob[0], pc->fc.switchable_interp_prob[1], pc->frame_type);
-#else
-    printf("Probs %d [%d]\n", pc->fc.switchable_interp_prob[0], pc->frame_type);
-#endif
-  */
 }
-#endif
 
 // This function updates the reference frame prediction stats
 static void update_refpred_stats(VP8_COMP *cpi) {
@@ -649,7 +609,6 @@ static void write_sub_mv_ref
                   vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
 }
 
-#if CONFIG_NEWMVENTROPY
 static void write_nmv(vp8_writer *bc, const MV *mv, const int_mv *ref,
                       const nmv_context *nmvc, int usehp) {
   MV e;
@@ -660,31 +619,6 @@ static void write_nmv(vp8_writer *bc, const MV *mv, const int_mv *ref,
   vp8_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp);
 }
 
-#else
-
-static void write_mv
-(
-  vp8_writer *bc, const MV *mv, const int_mv *ref, const MV_CONTEXT *mvc
-) {
-  MV e;
-  e.row = mv->row - ref->as_mv.row;
-  e.col = mv->col - ref->as_mv.col;
-
-  vp8_encode_motion_vector(bc, &e, mvc);
-}
-
-static void write_mv_hp
-(
-  vp8_writer *bc, const MV *mv, const int_mv *ref, const MV_CONTEXT_HP *mvc
-) {
-  MV e;
-  e.row = mv->row - ref->as_mv.row;
-  e.col = mv->col - ref->as_mv.col;
-
-  vp8_encode_motion_vector_hp(bc, &e, mvc);
-}
-#endif  /* CONFIG_NEWMVENTROPY */
-
 // This function writes the current macro block's segnment id to the bitstream
 // It should only be called if a segment map update is indicated.
 static void write_mb_segid(vp8_writer *bc,
@@ -821,12 +755,7 @@ static void update_ref_probs(VP8_COMP *const cpi) {
 static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
   int i;
   VP8_COMMON *const pc = &cpi->common;
-#if CONFIG_NEWMVENTROPY
   const nmv_context *nmvc = &pc->fc.nmvc;
-#else
-  const MV_CONTEXT *mvc = pc->fc.mvc;
-  const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp;
-#endif
   MACROBLOCK *x = &cpi->mb;
   MACROBLOCKD *xd = &cpi->mb.e_mbd;
   MODE_INFO *m;
@@ -1042,21 +971,19 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
                      cpi->common.pred_filter_mode);
           }
 #endif
-#if CONFIG_SWITCHABLE_INTERP
           if (mode >= NEARESTMV && mode <= SPLITMV)
           {
             if (cpi->common.mcomp_filter_type == SWITCHABLE) {
               vp8_write_token(bc, vp8_switchable_interp_tree,
-                              get_pred_probs(&cpi->common, xd, PRED_SWITCHABLE_INTERP),
+                              get_pred_probs(&cpi->common, xd,
+                                             PRED_SWITCHABLE_INTERP),
                               vp8_switchable_interp_encodings +
                               vp8_switchable_interp_map[mi->interp_filter]);
-              //if (!cpi->dummy_packing) printf("Reading: %d\n", mi->interp_filter);
             } else {
               assert (mi->interp_filter ==
                       cpi->common.mcomp_filter_type);
             }
           }
-#endif
           if (mi->second_ref_frame &&
               (mode == NEWMV || mode == SPLITMV)) {
             int_mv n1, n2;
@@ -1099,17 +1026,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
 
                 }
 #endif
-#if CONFIG_NEWMVENTROPY
                 write_nmv(bc, &mi->mv[0].as_mv, &best_mv,
                           (const nmv_context*) nmvc,
                           xd->allow_high_precision_mv);
-#else
-                if (xd->allow_high_precision_mv) {
-                  write_mv_hp(bc, &mi->mv[0].as_mv, &best_mv, mvc_hp);
-                } else {
-                  write_mv(bc, &mi->mv[0].as_mv, &best_mv, mvc);
-                }
-#endif
 
                 if (mi->second_ref_frame) {
 #if 0 //CONFIG_NEW_MVREF
@@ -1126,17 +1045,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
                                      &best_second_mv);
                   cpi->best_ref_index_counts[best_index]++;
 #endif
-#if CONFIG_NEWMVENTROPY
                   write_nmv(bc, &mi->mv[1].as_mv, &best_second_mv,
                             (const nmv_context*) nmvc,
                             xd->allow_high_precision_mv);
-#else
-                  if (xd->allow_high_precision_mv) {
-                    write_mv_hp(bc, &mi->mv[1].as_mv, &best_second_mv, mvc_hp);
-                  } else {
-                    write_mv(bc, &mi->mv[1].as_mv, &best_second_mv, mvc);
-                  }
-#endif
                 }
                 break;
               case SPLITMV: {
@@ -1178,40 +1089,16 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
 #ifdef ENTROPY_STATS
                     active_section = 11;
 #endif
-#if CONFIG_NEWMVENTROPY
                     write_nmv(bc, &blockmv.as_mv, &best_mv,
                               (const nmv_context*) nmvc,
                               xd->allow_high_precision_mv);
-#else
-                    if (xd->allow_high_precision_mv) {
-                      write_mv_hp(bc, &blockmv.as_mv, &best_mv,
-                                  (const MV_CONTEXT_HP *) mvc_hp);
-                    } else {
-                      write_mv(bc, &blockmv.as_mv, &best_mv,
-                               (const MV_CONTEXT *) mvc);
-                    }
-#endif
 
                     if (mi->second_ref_frame) {
-#if CONFIG_NEWMVENTROPY
                       write_nmv(bc,
                                 &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
                                 &best_second_mv,
                                 (const nmv_context*) nmvc,
                                 xd->allow_high_precision_mv);
-#else
-                      if (xd->allow_high_precision_mv) {
-                        write_mv_hp(
-                            bc,
-                            &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
-                            &best_second_mv, (const MV_CONTEXT_HP *)mvc_hp);
-                      } else {
-                        write_mv(
-                            bc,
-                            &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
-                            &best_second_mv, (const MV_CONTEXT *) mvc);
-                      }
-#endif
                     }
                   }
                 } while (++j < cpi->mb.partition_info->count);
@@ -1223,9 +1110,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
           }
         }
 
-#if CONFIG_TX_SELECT
         if (((rf == INTRA_FRAME && mode <= I8X8_PRED) ||
-             (rf != INTRA_FRAME && mode != SPLITMV)) &&
+             (rf != INTRA_FRAME && !(mode == SPLITMV &&
+                                     mi->partitioning == PARTITIONING_4X4))) &&
             pc->txfm_mode == TX_MODE_SELECT &&
             !((pc->mb_no_coeff_skip && mi->mb_skip_coeff) ||
               (segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
@@ -1233,10 +1120,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi, vp8_writer *const bc) {
           TX_SIZE sz = mi->txfm_size;
           // FIXME(rbultje) code ternary symbol once all experiments are merged
           vp8_write(bc, sz != TX_4X4, pc->prob_tx[0]);
-          if (sz != TX_4X4 && mode != I8X8_PRED)
+          if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV)
             vp8_write(bc, sz != TX_8X8, pc->prob_tx[1]);
         }
-#endif
 
 #ifdef ENTROPY_STATS
         active_section = 1;
@@ -1365,7 +1251,6 @@ static void write_mb_modes_kf(const VP8_COMMON  *c,
   } else
     write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
 
-#if CONFIG_TX_SELECT
   if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
       !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) ||
         (segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
@@ -1376,7 +1261,6 @@ static void write_mb_modes_kf(const VP8_COMMON  *c,
     if (sz != TX_4X4 && ym <= TM_PRED)
       vp8_write(bc, sz != TX_8X8, c->prob_tx[1]);
   }
-#endif
 }
 
 static void write_kfmodes(VP8_COMP* const cpi, vp8_writer* const bc) {
@@ -1498,7 +1382,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
       }
     }
   }
-#if CONFIG_HYBRIDTRANSFORM
   for (i = 0; i < BLOCK_TYPES; ++i) {
     for (j = 0; j < COEF_BANDS; ++j) {
       for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -1519,8 +1402,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
       }
     }
   }
-#endif
-
 
   if (cpi->common.txfm_mode != ONLY_4X4) {
     for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
@@ -1547,7 +1428,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
         }
       }
     }
-#if CONFIG_HYBRIDTRANSFORM8X8
     for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
       for (j = 0; j < COEF_BANDS; ++j) {
         for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -1572,7 +1452,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
         }
       }
     }
-#endif
   }
 
   if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -1595,7 +1474,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
       }
     }
   }
-#if CONFIG_HYBRIDTRANSFORM16X16
   for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
     for (j = 0; j < COEF_BANDS; ++j) {
       for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -1614,7 +1492,6 @@ void build_coeff_contexts(VP8_COMP *cpi) {
       }
     }
   }
-#endif
 }
 
 #if 0
@@ -1887,7 +1764,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
     }
   }
 
-#if CONFIG_HYBRIDTRANSFORM
   savings = 0;
   update[0] = update[1] = 0;
   for (i = 0; i < BLOCK_TYPES; ++i) {
@@ -1976,7 +1852,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
       }
     }
   }
-#endif
 
   /* do not do this if not even allowed */
   if (cpi->common.txfm_mode != ONLY_4X4) {
@@ -2054,7 +1929,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
         }
       }
     }
-#if CONFIG_HYBRIDTRANSFORM8X8
     update[0] = update[1] = 0;
     savings = 0;
     for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
@@ -2128,7 +2002,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
         }
       }
     }
-#endif
   }
 
   if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -2206,7 +2079,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
       }
     }
   }
-#if CONFIG_HYBRIDTRANSFORM16X16
   update[0] = update[1] = 0;
   savings = 0;
   for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
@@ -2280,7 +2152,6 @@ static void update_coef_probs(VP8_COMP* const cpi, vp8_writer* const bc) {
       }
     }
   }
-#endif
   }
 }
 
@@ -2561,12 +2432,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
     /* sb mode probability */
     const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1));
 
-    pc->sb_coded = get_prob(cpi->sb_count, sb_max);
+    pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max);
     vp8_write_literal(&header_bc, pc->sb_coded, 8);
   }
 #endif
 
-#if CONFIG_TX_SELECT
   {
     if (pc->txfm_mode == TX_MODE_SELECT) {
       pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0],
@@ -2583,9 +2453,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
       vp8_write_literal(&header_bc, pc->prob_tx[1], 8);
     }
   }
-#else
-  vp8_write_bit(&header_bc, !!pc->txfm_mode);
-#endif
 
   // Encode the loop filter level and type
   vp8_write_bit(&header_bc, pc->filter_type);
@@ -2687,7 +2554,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
 
     // Signal whether to allow high MV precision
     vp8_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0);
-#if CONFIG_SWITCHABLE_INTERP
     if (pc->mcomp_filter_type == SWITCHABLE) {
       /* Check to see if only one of the filters is actually used */
       int count[VP8_SWITCHABLE_FILTERS];
@@ -2712,7 +2578,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
     // Signal the type of subpel filter to use
     vp8_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE));
     if (pc->mcomp_filter_type != SWITCHABLE)
-#endif  /* CONFIG_SWITCHABLE_INTERP */
       vp8_write_literal(&header_bc, (pc->mcomp_filter_type), 2);
   }
 
@@ -2731,29 +2596,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
   vp8_clear_system_state();  // __asm emms;
 
   vp8_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
   vp8_copy(cpi->common.fc.pre_hybrid_coef_probs, cpi->common.fc.hybrid_coef_probs);
-#endif
   vp8_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
   vp8_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8);
-#endif
   vp8_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16);
-#endif
   vp8_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
   vp8_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
   vp8_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob);
   vp8_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
   vp8_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
   vp8_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
-#if CONFIG_NEWMVENTROPY
   cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
-#else
-  vp8_copy(cpi->common.fc.pre_mvc, cpi->common.fc.mvc);
-  vp8_copy(cpi->common.fc.pre_mvc_hp, cpi->common.fc.mvc_hp);
-#endif
   vp8_zero(cpi->sub_mv_ref_count);
   vp8_zero(cpi->mbsplit_count);
   vp8_zero(cpi->common.fc.mv_ref_ct)
@@ -2796,10 +2650,8 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
       vp8_write_literal(&header_bc, pc->prob_pred_filter_off, 8);
 
 #endif
-#if CONFIG_SWITCHABLE_INTERP
     if (pc->mcomp_filter_type == SWITCHABLE)
       update_switchable_interp_probs(cpi, &header_bc);
-#endif
 
     vp8_write_literal(&header_bc, pc->prob_intra_coded, 8);
     vp8_write_literal(&header_bc, pc->prob_last_coded, 8);
@@ -2825,15 +2677,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
 
     update_mbintra_mode_probs(cpi, &header_bc);
 
-#if CONFIG_NEWMVENTROPY
     vp8_write_nmvprobs(cpi, xd->allow_high_precision_mv, &header_bc);
-#else
-    if (xd->allow_high_precision_mv) {
-      vp8_write_mvprobs_hp(cpi, &header_bc);
-    } else {
-      vp8_write_mvprobs(cpi, &header_bc);
-    }
-#endif
   }
 
   vp8_stop_encode(&header_bc);
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 80f9b75b8..48623be8c 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -79,9 +79,7 @@ typedef struct {
   int hybrid_pred_diff;
   int comp_pred_diff;
   int single_pred_diff;
-#if CONFIG_TX_SELECT
   int64_t txfm_rd_diff[NB_TXFM_MODES];
-#endif
 } PICK_MODE_CONTEXT;
 
 typedef struct {
@@ -114,7 +112,6 @@ typedef struct {
   int *mb_norm_activity_ptr;
   signed int act_zbin_adj;
 
-#if CONFIG_NEWMVENTROPY
   int nmvjointcost[MV_JOINTS];
   int nmvcosts[2][MV_VALS];
   int *nmvcost[2];
@@ -126,28 +123,17 @@ typedef struct {
   int *nmvsadcost[2];
   int nmvsadcosts_hp[2][MV_VALS];
   int *nmvsadcost_hp[2];
-#else
-  int mvcosts[2][MVvals + 1];
-  int *mvcost[2];
-  int mvsadcosts[2][MVfpvals + 1];
-  int *mvsadcost[2];
-  int mvcosts_hp[2][MVvals_hp + 1];
-  int *mvcost_hp[2];
-  int mvsadcosts_hp[2][MVfpvals_hp + 1];
-  int *mvsadcost_hp[2];
-#endif  /* CONFIG_NEWMVENTROPY */
 
   int mbmode_cost[2][MB_MODE_COUNT];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
   int bmode_costs[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
   int i8x8_mode_costs[MB_MODE_COUNT];
   int inter_bmode_costs[B_MODE_COUNT];
-#if CONFIG_SWITCHABLE_INTERP
-  int switchable_interp_costs[VP8_SWITCHABLE_FILTERS+1]
+  int switchable_interp_costs[VP8_SWITCHABLE_FILTERS + 1]
                              [VP8_SWITCHABLE_FILTERS];
-#endif
 
-  // These define limits to motion vector components to prevent them from extending outside the UMV borders
+  // These define limits to motion vector components to prevent them
+  // from extending outside the UMV borders
   int mv_col_min;
   int mv_col_max;
   int mv_row_min;
@@ -164,10 +150,8 @@ typedef struct {
 
   unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
     [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
     [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-#endif
 
   int optimize;
 
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index cd13fec7c..0983b1c0a 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -14,8 +14,6 @@
 #include "vp8/common/idct.h"
 #include "vp8/common/systemdependent.h"
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
-
 #include "vp8/common/blockd.h"
 
 // TODO: these transforms can be converted into integer forms to reduce
@@ -71,9 +69,7 @@ float adst_8[64] = {
   0.175227946595735,  -0.326790388032145,   0.434217976756762,  -0.483002021635509,
   0.466553967085785,  -0.387095214016348,   0.255357107325376,  -0.089131608307532
 };
-#endif
 
-#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
 float dct_16[256] = {
   0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
   0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
@@ -143,7 +139,6 @@ float adst_16[256] = {
   0.065889, -0.129396,  0.188227, -0.240255,  0.283599, -0.316693,  0.338341, -0.347761,
   0.344612, -0.329007,  0.301511, -0.263118,  0.215215, -0.159534,  0.098087, -0.033094
 };
-#endif
 
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
@@ -400,7 +395,6 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
 
 }
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_fht_c(short *input, short *output, int pitch,
                TX_TYPE tx_type, int tx_dim) {
 
@@ -518,7 +512,6 @@ void vp8_fht_c(short *input, short *output, int pitch,
   }
   vp8_clear_system_state(); // Make it simd safe : __asm emms;
 }
-#endif
 
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
   int i;
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index 180192bbb..4ad1fe85d 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -26,10 +26,8 @@
 #endif
 
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_fht_c(short *input, short *output, int pitch,
                TX_TYPE tx_type, int tx_dim);
-#endif
 
 #ifndef vp8_fdct_short16x16
 #define vp8_fdct_short16x16 vp8_short_fdct16x16_c
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 619695c33..0910cfd35 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -120,8 +120,8 @@ static unsigned int tt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x) {
    *  lambda using a non-linear combination (e.g., the smallest, or second
    *  smallest, etc.).
    */
-  act =     VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer,
-                                                           x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
+  act = vp8_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0,
+                          &sse);
   act = act << 4;
 
   /* If the region is flat, lower the activity some more. */
@@ -411,7 +411,6 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
     mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
   }
 
-#if CONFIG_TX_SELECT
   {
     int segment_id = mbmi->segment_id;
     if (!segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
@@ -421,7 +420,6 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
       }
     }
   }
-#endif
 
   if (cpi->common.frame_type == KEY_FRAME) {
     // Restore the coding modes to that held in the coding context
@@ -1342,37 +1340,18 @@ static void encode_frame_internal(VP8_COMP *cpi) {
   cpi->pred_filter_on_count = 0;
   cpi->pred_filter_off_count = 0;
 #endif
-#if CONFIG_SWITCHABLE_INTERP
   vp8_zero(cpi->switchable_interp_count);
-#endif
-
-#if 0
-  // Experimental code
-  cpi->frame_distortion = 0;
-  cpi->last_mb_distortion = 0;
-#endif
 
   xd->mode_info_context = cm->mi;
   xd->prev_mode_info_context = cm->prev_mi;
 
-#if CONFIG_NEWMVENTROPY
   vp8_zero(cpi->NMVcount);
-#else
-  vp8_zero(cpi->MVcount);
-  vp8_zero(cpi->MVcount_hp);
-#endif
   vp8_zero(cpi->coef_counts);
-#if CONFIG_HYBRIDTRANSFORM
   vp8_zero(cpi->hybrid_coef_counts);
-#endif
   vp8_zero(cpi->coef_counts_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
   vp8_zero(cpi->hybrid_coef_counts_8x8);
-#endif
   vp8_zero(cpi->coef_counts_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
   vp8_zero(cpi->hybrid_coef_counts_16x16);
-#endif
 
   vp8cx_frame_init_quantizer(cpi);
 
@@ -1393,11 +1372,9 @@ static void encode_frame_internal(VP8_COMP *cpi) {
   vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
   vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count));
   vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count));
-#if CONFIG_TX_SELECT
   vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count));
   vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p));
   vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
-#endif
   {
     struct vpx_usec_timer  emr_timer;
     vpx_usec_timer_start(&emr_timer);
@@ -1457,7 +1434,6 @@ static int check_dual_ref_flags(VP8_COMP *cpi) {
   }
 }
 
-#if CONFIG_TX_SELECT
 static void reset_skip_txfm_size(VP8_COMP *cpi, TX_SIZE txfm_max) {
   VP8_COMMON *cm = &cpi->common;
   int mb_row, mb_col, mis = cm->mode_info_stride;
@@ -1481,7 +1457,6 @@ static void reset_skip_txfm_size(VP8_COMP *cpi, TX_SIZE txfm_max) {
     }
   }
 }
-#endif
 
 void vp8_encode_frame(VP8_COMP *cpi) {
   if (cpi->sf.RD) {
@@ -1527,7 +1502,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
       txfm_type = ONLY_4X4;
     } else
 #endif
-#if CONFIG_TX_SELECT
     /* FIXME (rbultje)
      * this is a hack (no really), basically to work around the complete
      * nonsense coefficient cost prediction for keyframes. The probabilities
@@ -1575,16 +1549,11 @@ void vp8_encode_frame(VP8_COMP *cpi) {
                  cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
     ALLOW_16X16 : TX_MODE_SELECT;
 #endif
-#else
-    txfm_type = ALLOW_16X16;
-#endif // CONFIG_TX_SELECT
     cpi->common.txfm_mode = txfm_type;
-#if CONFIG_TX_SELECT
     if (txfm_type != TX_MODE_SELECT) {
       cpi->common.prob_tx[0] = 128;
       cpi->common.prob_tx[1] = 128;
     }
-#endif
     cpi->common.comp_pred_mode = pred_type;
     encode_frame_internal(cpi);
 
@@ -1594,7 +1563,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
       cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
     }
 
-#if CONFIG_TX_SELECT
     for (i = 0; i < NB_TXFM_MODES; ++i) {
       int64_t pd = cpi->rd_tx_select_diff[i];
       int diff;
@@ -1604,7 +1572,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
       cpi->rd_tx_select_threshes[frame_type][i] += diff;
       cpi->rd_tx_select_threshes[frame_type][i] /= 2;
     }
-#endif
 
     if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
       int single_count_zero = 0;
@@ -1622,7 +1589,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
       }
     }
 
-#if CONFIG_TX_SELECT
     if (cpi->common.txfm_mode == TX_MODE_SELECT) {
       const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4];
       const int count8x8 = cpi->txfm_count[TX_8X8];
@@ -1639,7 +1605,6 @@ void vp8_encode_frame(VP8_COMP *cpi) {
         cpi->common.txfm_mode = ALLOW_16X16;
       }
     }
-#endif
   } else {
     encode_frame_internal(cpi);
   }
@@ -1957,15 +1922,12 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
   }
 
   if (output_enabled) {
-#if CONFIG_TX_SELECT
     int segment_id = mbmi->segment_id;
-#endif
 
     // Tokenize
     sum_intra_stats(cpi, x);
     vp8_tokenize_mb(cpi, &x->e_mbd, t, 0);
 
-#if CONFIG_TX_SELECT
     if (cpi->common.txfm_mode == TX_MODE_SELECT &&
         !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
           (segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) &&
@@ -1975,9 +1937,7 @@ void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
       } else if (mbmi->mode == I8X8_PRED) {
         cpi->txfm_count_8x8p[mbmi->txfm_size]++;
       }
-    } else
-#endif
-    if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) {
+    } else if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) {
       mbmi->txfm_size = TX_16X16;
     } else
     if (cpi->common.txfm_mode >= ALLOW_8X8 && mbmi->mode != B_PRED) {
@@ -2012,9 +1972,7 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
   assert(!xd->mode_info_context->mbmi.encoded_as_sb);
 #endif
 
-#if CONFIG_SWITCHABLE_INTERP
   vp8_setup_interp_filters(xd, mbmi->interp_filter, cm);
-#endif
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
     // Adjust the zbin based on this MB rate.
     adjust_act_zbin(cpi, x);
@@ -2158,7 +2116,6 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
   }
 
   if (output_enabled) {
-#if CONFIG_TX_SELECT
     int segment_id = mbmi->segment_id;
     if (cpi->common.txfm_mode == TX_MODE_SELECT &&
         !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
@@ -2167,16 +2124,18 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
       if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
           mbmi->mode != SPLITMV) {
         cpi->txfm_count[mbmi->txfm_size]++;
-      } else if (mbmi->mode == I8X8_PRED) {
+      } else if (mbmi->mode == I8X8_PRED ||
+                 (mbmi->mode == SPLITMV &&
+                  mbmi->partitioning != PARTITIONING_4X4)) {
         cpi->txfm_count_8x8p[mbmi->txfm_size]++;
       }
-    } else
-#endif
-    if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
+    } else if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
         mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) {
       mbmi->txfm_size = TX_16X16;
-    } else if (mbmi->mode != B_PRED && mbmi->mode != SPLITMV &&
-        cpi->common.txfm_mode >= ALLOW_8X8) {
+    } else if (mbmi->mode != B_PRED &&
+               !(mbmi->mode == SPLITMV &&
+                 mbmi->partitioning == PARTITIONING_4X4) &&
+               cpi->common.txfm_mode >= ALLOW_8X8) {
       mbmi->txfm_size = TX_8X8;
     } else {
       mbmi->txfm_size = TX_4X4;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 9076780d9..f44df22ea 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -48,7 +48,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
     }
   }
 
-  intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
+  intra_pred_var = vp8_get_mb_ss(x->src_diff);
 
   return intra_pred_var;
 }
@@ -57,9 +57,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
                               MACROBLOCK *x, int ib) {
   BLOCKD *b = &x->e_mbd.block[ib];
   BLOCK *be = &x->block[ib];
-#if CONFIG_HYBRIDTRANSFORM
   TX_TYPE tx_type;
-#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
@@ -74,15 +72,12 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
 
   ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
 
-#if CONFIG_HYBRIDTRANSFORM
   tx_type = get_tx_type(&x->e_mbd, b);
   if (tx_type != DCT_DCT) {
     vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4);
     vp8_ht_quantize_b_4x4(be, b, tx_type);
     vp8_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4);
-  } else
-#endif
-  {
+  } else {
     x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32) ;
     x->quantize_b_4x4(be, b) ;
     vp8_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32) ;
@@ -103,9 +98,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   BLOCK *b = &x->block[0];
   TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
-#if CONFIG_HYBRIDTRANSFORM16X16
   TX_TYPE tx_type;
-#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (xd->mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1))
@@ -120,7 +113,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
                                            xd->predictor, b->src_stride);
 
   if (tx_size == TX_16X16) {
-#if CONFIG_HYBRIDTRANSFORM16X16
     BLOCKD  *bd = &xd->block[0];
     tx_type = get_tx_type(xd, bd);
     if (tx_type != DCT_DCT) {
@@ -129,9 +121,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
       if (x->optimize)
         vp8_optimize_mby_16x16(x, rtcd);
       vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16);
-    } else
-#endif
-    {
+    } else {
       vp8_transform_mby_16x16(x);
       vp8_quantize_mby_16x16(x);
       if (x->optimize)
@@ -196,9 +186,7 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
   BLOCK *be = &x->block[ib];
   const int iblock[4] = {0, 1, 4, 5};
   int i;
-#if CONFIG_HYBRIDTRANSFORM8X8
   TX_TYPE tx_type;
-#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) {
@@ -217,7 +205,6 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
     // generate residual blocks
     vp8_subtract_4b_c(be, b, 16);
 
-#if CONFIG_HYBRIDTRANSFORM8X8
     tx_type = get_tx_type(xd, xd->block + idx);
     if (tx_type != DCT_DCT) {
       vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
@@ -226,13 +213,10 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
       vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
                    tx_type, 8);
     } else {
-#endif
       x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
       vp8_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
-#if CONFIG_HYBRIDTRANSFORM8X8
     }
-#endif
   } else {
     for (i = 0; i < 4; i++) {
       b = &xd->block[ib + iblock[i]];
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index dc54d05a2..d3bd0f1dd 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -304,7 +304,6 @@ void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
       scan = vp8_default_zig_zag1d;
       bands = vp8_coef_bands;
       default_eob = 16;
-#if CONFIG_HYBRIDTRANSFORM
       // TODO: this isn't called (for intra4x4 modes), but will be left in
       // since it could be used later
       {
@@ -327,7 +326,6 @@ void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
           scan = vp8_default_zig_zag1d;
         }
       }
-#endif
       break;
     case TX_8X8:
       scan = vp8_default_zig_zag1d_8x8;
@@ -638,6 +636,7 @@ void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
   ENTROPY_CONTEXT_PLANES t_above, t_left;
   ENTROPY_CONTEXT *ta;
   ENTROPY_CONTEXT *tl;
+  int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV;
 
   if (!x->e_mbd.above_context || !x->e_mbd.left_context)
     return;
@@ -647,18 +646,21 @@ void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
 
   ta = (ENTROPY_CONTEXT *)&t_above;
   tl = (ENTROPY_CONTEXT *)&t_left;
-  type = PLANE_TYPE_Y_NO_DC;
+  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
   for (b = 0; b < 16; b += 4) {
     optimize_b(x, b, type,
                ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
                rtcd, TX_8X8);
-    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
-    *(tl + vp8_block2left_8x8[b] + 1)  = *(tl + vp8_block2left_8x8[b]);
+    ta[vp8_block2above_8x8[b] + 1] = ta[vp8_block2above_8x8[b]];
+    tl[vp8_block2left_8x8[b] + 1]  = tl[vp8_block2left_8x8[b]];
   }
 
   // 8x8 always have 2nd roder haar block
-  check_reset_8x8_2nd_coeffs(&x->e_mbd,
-                             ta + vp8_block2above_8x8[24], tl + vp8_block2left_8x8[24]);
+  if (has_2nd_order) {
+    check_reset_8x8_2nd_coeffs(&x->e_mbd,
+                               ta + vp8_block2above_8x8[24],
+                               tl + vp8_block2left_8x8[24]);
+  }
 }
 
 void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
@@ -680,8 +682,8 @@ void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
     optimize_b(x, b, PLANE_TYPE_UV,
                ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
                rtcd, TX_8X8);
-    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
-    *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]);
+    ta[vp8_block2above_8x8[b] + 1] = ta[vp8_block2above_8x8[b]];
+    tl[vp8_block2left_8x8[b] + 1]  = tl[vp8_block2left_8x8[b]];
   }
 }
 
@@ -898,11 +900,25 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
       optimize_mb_16x16(x, rtcd);
     vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), xd);
   } else if (tx_size == TX_8X8) {
-    vp8_transform_mb_8x8(x);
-    vp8_quantize_mb_8x8(x);
-    if (x->optimize)
-      optimize_mb_8x8(x, rtcd);
-    vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd);
+    if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+      assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
+      vp8_transform_mby_8x8(x);
+      vp8_transform_mbuv_4x4(x);
+      vp8_quantize_mby_8x8(x);
+      vp8_quantize_mbuv_4x4(x);
+      if (x->optimize) {
+        vp8_optimize_mby_8x8(x, rtcd);
+        vp8_optimize_mbuv_4x4(x, rtcd);
+      }
+      vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd);
+      vp8_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd);
+    } else {
+      vp8_transform_mb_8x8(x);
+      vp8_quantize_mb_8x8(x);
+      if (x->optimize)
+        optimize_mb_8x8(x, rtcd);
+      vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd);
+    }
   } else {
     transform_mb_4x4(x);
     vp8_quantize_mb_4x4(x);
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index d520d995a..75dad2f9b 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -22,8 +22,6 @@ extern unsigned int active_section;
 
 //extern int final_packing;
 
-#if CONFIG_NEWMVENTROPY
-
 #ifdef NMV_STATS
 nmv_context_counts tnmvcounts;
 #endif
@@ -549,593 +547,3 @@ void vp8_build_nmv_cost_table(int *mvjoint,
   if (mvc_flag_h)
     build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
 }
-
-#else  /* CONFIG_NEWMVENTROPY */
-
-static void encode_mvcomponent(
-  vp8_writer *const bc,
-  const int v,
-  const struct mv_context *mvc
-) {
-  const vp8_prob *p = mvc->prob;
-  const int x = v < 0 ? -v : v;
-
-  if (x < mvnum_short) {   // Small
-    vp8_write(bc, 0, p[mvpis_short]);
-    vp8_treed_write(bc, vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
-    if (!x)
-      return;         // no sign bit
-  } else {                // Large
-    int i = 0;
-
-    vp8_write(bc, 1, p[mvpis_short]);
-
-    do
-      vp8_write(bc, (x >> i) & 1, p[MVPbits + i]);
-
-    while (++i < mvnum_short_bits);
-
-    i = mvlong_width - 1;  /* Skip bit 3, which is sometimes implicit */
-
-    do
-      vp8_write(bc, (x >> i) & 1, p[MVPbits + i]);
-
-    while (--i > mvnum_short_bits);
-
-    if (x & ~((2 << mvnum_short_bits) - 1))
-      vp8_write(bc, (x >> mvnum_short_bits) & 1, p[MVPbits + mvnum_short_bits]);
-  }
-
-  vp8_write(bc, v < 0, p[MVPsign]);
-}
-
-void vp8_encode_motion_vector(vp8_writer* const       bc,
-                              const MV* const         mv,
-                              const MV_CONTEXT* const mvc) {
-  encode_mvcomponent(bc, mv->row >> 1, &mvc[0]);
-  encode_mvcomponent(bc, mv->col >> 1, &mvc[1]);
-}
-
-
-static unsigned int cost_mvcomponent(const int v,
-                                     const struct mv_context* const mvc) {
-  const vp8_prob *p = mvc->prob;
-  const int x = v;   // v<0? -v:v;
-  unsigned int cost;
-
-  if (x < mvnum_short) {
-    cost = vp8_cost_zero(p [mvpis_short])
-           + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
-
-    if (!x)
-      return cost;
-  } else {
-    int i = 0;
-    cost = vp8_cost_one(p [mvpis_short]);
-
-    do
-      cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
-
-    while (++i < mvnum_short_bits);
-
-    i = mvlong_width - 1;  /* Skip bit 3, which is sometimes implicit */
-
-    do
-      cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
-
-    while (--i > mvnum_short_bits);
-
-    if (x & ~((2 << mvnum_short_bits) - 1))
-      cost += vp8_cost_bit(p [MVPbits + mvnum_short_bits], (x >> mvnum_short_bits) & 1);
-  }
-
-  return cost;   // + vp8_cost_bit( p [MVPsign], v < 0);
-}
-
-void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc,
-                                    const int mvc_flag[2]) {
-  int i = 1;   // -mv_max;
-  unsigned int cost0 = 0;
-  unsigned int cost1 = 0;
-
-  vp8_clear_system_state();
-
-  i = 1;
-
-  if (mvc_flag[0]) {
-    mvcost [0] [0] = cost_mvcomponent(0, &mvc[0]);
-
-    do {
-      // mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]);
-      cost0 = cost_mvcomponent(i, &mvc[0]);
-
-      mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]);
-      mvcost [0] [-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign]);
-    } while (++i <= mv_max);
-  }
-
-  i = 1;
-
-  if (mvc_flag[1]) {
-    mvcost [1] [0] = cost_mvcomponent(0, &mvc[1]);
-
-    do {
-      // mvcost [1] [i] = cost_mvcomponent( i, mvc[1]);
-      cost1 = cost_mvcomponent(i, &mvc[1]);
-
-      mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]);
-      mvcost [1] [-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign]);
-    } while (++i <= mv_max);
-  }
-}
-
-
-// Motion vector probability table update depends on benefit.
-// Small correction allows for the fact that an update to an MV probability
-// may have benefit in subsequent frames as well as the current one.
-
-#define MV_PROB_UPDATE_CORRECTION   -1
-
-
-__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2]) {
-  const unsigned int tot = ct[0] + ct[1];
-
-  if (tot) {
-    const vp8_prob x = ((ct[0] * 255) / tot) & -2;
-    *p = x ? x : 1;
-  }
-}
-
-static void update(
-  vp8_writer *const bc,
-  const unsigned int ct[2],
-  vp8_prob *const cur_p,
-  const vp8_prob new_p,
-  const vp8_prob update_p,
-  int *updated
-) {
-  const int cur_b = vp8_cost_branch(ct, *cur_p);
-  const int new_b = vp8_cost_branch(ct, new_p);
-  const int cost = 7 + MV_PROB_UPDATE_CORRECTION + ((vp8_cost_one(update_p) - vp8_cost_zero(update_p) + 128) >> 8);
-
-  if (cur_b - new_b > cost) {
-    *cur_p = new_p;
-    vp8_write(bc, 1, update_p);
-    vp8_write_literal(bc, new_p >> 1, 7);
-    *updated = 1;
-
-  } else
-    vp8_write(bc, 0, update_p);
-}
-
-static void write_component_probs(
-  vp8_writer *const bc,
-  struct mv_context *cur_mvc,
-  const struct mv_context *default_mvc_,
-  const struct mv_context *update_mvc,
-  const unsigned int events [MVvals],
-  unsigned int rc,
-  int *updated
-) {
-  vp8_prob *Pcur = cur_mvc->prob;
-  const vp8_prob *default_mvc = default_mvc_->prob;
-  const vp8_prob *Pupdate = update_mvc->prob;
-  unsigned int is_short_ct[2], sign_ct[2];
-
-  unsigned int bit_ct [mvlong_width] [2];
-
-  unsigned int short_ct  [mvnum_short];
-  unsigned int short_bct [mvnum_short - 1] [2];
-
-  vp8_prob Pnew [MVPcount];
-
-  (void) rc;
-  vp8_copy_array(Pnew, default_mvc, MVPcount);
-
-  vp8_zero(is_short_ct)
-  vp8_zero(sign_ct)
-  vp8_zero(bit_ct)
-  vp8_zero(short_ct)
-  vp8_zero(short_bct)
-
-
-  // j=0
-  {
-    const int c = events [mv_max];
-
-    is_short_ct [0] += c;    // Short vector
-    short_ct [0] += c;       // Magnitude distribution
-  }
-
-  // j: 1 ~ mv_max (1023)
-  {
-    int j = 1;
-
-    do {
-      const int c1 = events [mv_max + j];  // positive
-      const int c2 = events [mv_max - j];  // negative
-      const int c  = c1 + c2;
-      int a = j;
-
-      sign_ct [0] += c1;
-      sign_ct [1] += c2;
-
-      if (a < mvnum_short) {
-        is_short_ct [0] += c;     // Short vector
-        short_ct [a] += c;       // Magnitude distribution
-      } else {
-        int k = mvlong_width - 1;
-        is_short_ct [1] += c;     // Long vector
-
-        /*  bit 3 not always encoded. */
-        do
-          bit_ct [k] [(a >> k) & 1] += c;
-
-        while (--k >= 0);
-      }
-    } while (++j <= mv_max);
-  }
-
-  calc_prob(Pnew + mvpis_short, is_short_ct);
-
-  calc_prob(Pnew + MVPsign, sign_ct);
-
-  {
-    vp8_prob p [mvnum_short - 1];    /* actually only need branch ct */
-    int j = 0;
-
-    vp8_tree_probs_from_distribution(
-      mvnum_short, vp8_small_mvencodings, vp8_small_mvtree,
-      p, short_bct, short_ct,
-      256, 1
-    );
-
-    do
-      calc_prob(Pnew + MVPshort + j, short_bct[j]);
-
-    while (++j < mvnum_short - 1);
-  }
-
-  {
-    int j = 0;
-
-    do
-      calc_prob(Pnew + MVPbits + j, bit_ct[j]);
-
-    while (++j < mvlong_width);
-  }
-
-  update(bc, is_short_ct, Pcur + mvpis_short, Pnew[mvpis_short],
-         *Pupdate++, updated);
-
-  update(bc, sign_ct, Pcur + MVPsign, Pnew[MVPsign],
-         *Pupdate++, updated);
-
-  {
-    const vp8_prob *const new_p = Pnew + MVPshort;
-    vp8_prob *const cur_p = Pcur + MVPshort;
-
-    int j = 0;
-
-    do
-
-      update(bc, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
-    while (++j < mvnum_short - 1);
-  }
-
-  {
-    const vp8_prob *const new_p = Pnew + MVPbits;
-    vp8_prob *const cur_p = Pcur + MVPbits;
-
-    int j = 0;
-
-    do
-
-      update(bc, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
-    while (++j < mvlong_width);
-  }
-}
-
-void vp8_write_mvprobs(VP8_COMP* const cpi, vp8_writer* const bc) {
-  MV_CONTEXT *mvc = cpi->common.fc.mvc;
-  int flags[2] = {0, 0};
-#ifdef ENTROPY_STATS
-  active_section = 4;
-#endif
-  write_component_probs(
-      bc, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0],
-      cpi->MVcount[0], 0, &flags[0]);
-
-  write_component_probs(
-      bc, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1],
-      cpi->MVcount[1], 1, &flags[1]);
-
-  if (flags[0] || flags[1])
-    vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags);
-
-#ifdef ENTROPY_STATS
-  active_section = 5;
-#endif
-}
-
-
-static void encode_mvcomponent_hp(
-  vp8_writer *const bc,
-  const int v,
-  const struct mv_context_hp *mvc
-) {
-  const vp8_prob *p = mvc->prob;
-  const int x = v < 0 ? -v : v;
-
-  if (x < mvnum_short_hp) {   // Small
-    vp8_write(bc, 0, p[mvpis_short_hp]);
-    vp8_treed_write(bc, vp8_small_mvtree_hp, p + MVPshort_hp, x,
-                    mvnum_short_bits_hp);
-    if (!x)
-      return;         // no sign bit
-  } else {                // Large
-    int i = 0;
-
-    vp8_write(bc, 1, p[mvpis_short_hp]);
-
-    do
-      vp8_write(bc, (x >> i) & 1, p[MVPbits_hp + i]);
-
-    while (++i < mvnum_short_bits_hp);
-
-    i = mvlong_width_hp - 1;  /* Skip bit 3, which is sometimes implicit */
-
-    do
-      vp8_write(bc, (x >> i) & 1, p[MVPbits_hp + i]);
-
-    while (--i > mvnum_short_bits_hp);
-
-    if (x & ~((2 << mvnum_short_bits_hp) - 1))
-      vp8_write(bc, (x >> mvnum_short_bits_hp) & 1,
-                p[MVPbits_hp + mvnum_short_bits_hp]);
-  }
-
-  vp8_write(bc, v < 0, p[MVPsign_hp]);
-}
-
-void vp8_encode_motion_vector_hp(vp8_writer *bc, const MV *mv,
-                                 const MV_CONTEXT_HP *mvc) {
-
-  encode_mvcomponent_hp(bc, mv->row, &mvc[0]);
-  encode_mvcomponent_hp(bc, mv->col, &mvc[1]);
-}
-
-
-static unsigned int cost_mvcomponent_hp(const int v,
-                                        const struct mv_context_hp *mvc) {
-  const vp8_prob *p = mvc->prob;
-  const int x = v;   // v<0? -v:v;
-  unsigned int cost;
-
-  if (x < mvnum_short_hp) {
-    cost = vp8_cost_zero(p [mvpis_short_hp])
-           + vp8_treed_cost(vp8_small_mvtree_hp, p + MVPshort_hp, x,
-                            mvnum_short_bits_hp);
-
-    if (!x)
-      return cost;
-  } else {
-    int i = 0;
-    cost = vp8_cost_one(p [mvpis_short_hp]);
-
-    do
-      cost += vp8_cost_bit(p [MVPbits_hp + i], (x >> i) & 1);
-
-    while (++i < mvnum_short_bits_hp);
-
-    i = mvlong_width_hp - 1;  /* Skip bit 3, which is sometimes implicit */
-
-    do
-      cost += vp8_cost_bit(p [MVPbits_hp + i], (x >> i) & 1);
-
-    while (--i > mvnum_short_bits_hp);
-
-    if (x & ~((2 << mvnum_short_bits_hp) - 1))
-      cost += vp8_cost_bit(p [MVPbits_hp + mvnum_short_bits_hp],
-                           (x >> mvnum_short_bits_hp) & 1);
-  }
-
-  return cost;   // + vp8_cost_bit( p [MVPsign], v < 0);
-}
-
-void vp8_build_component_cost_table_hp(int *mvcost[2],
-                                       const MV_CONTEXT_HP *mvc,
-                                       const int mvc_flag[2]) {
-  int i = 1;   // -mv_max;
-  unsigned int cost0 = 0;
-  unsigned int cost1 = 0;
-
-  vp8_clear_system_state();
-
-  i = 1;
-
-  if (mvc_flag[0]) {
-    mvcost [0] [0] = cost_mvcomponent_hp(0, &mvc[0]);
-
-    do {
-      // mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]);
-      cost0 = cost_mvcomponent_hp(i, &mvc[0]);
-
-      mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign_hp]);
-      mvcost [0] [-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign_hp]);
-    } while (++i <= mv_max_hp);
-  }
-
-  i = 1;
-
-  if (mvc_flag[1]) {
-    mvcost [1] [0] = cost_mvcomponent_hp(0, &mvc[1]);
-
-    do {
-      // mvcost [1] [i] = cost_mvcomponent( i, mvc[1]);
-      cost1 = cost_mvcomponent_hp(i, &mvc[1]);
-
-      mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign_hp]);
-      mvcost [1] [-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign_hp]);
-    } while (++i <= mv_max_hp);
-  }
-}
-
-
-static void write_component_probs_hp(
-  vp8_writer *const bc,
-  struct mv_context_hp *cur_mvc,
-  const struct mv_context_hp *default_mvc_,
-  const struct mv_context_hp *update_mvc,
-  const unsigned int events [MVvals_hp],
-  unsigned int rc,
-  int *updated
-) {
-  vp8_prob *Pcur = cur_mvc->prob;
-  const vp8_prob *default_mvc = default_mvc_->prob;
-  const vp8_prob *Pupdate = update_mvc->prob;
-  unsigned int is_short_ct[2], sign_ct[2];
-
-  unsigned int bit_ct [mvlong_width_hp] [2];
-
-  unsigned int short_ct  [mvnum_short_hp];
-  unsigned int short_bct [mvnum_short_hp - 1] [2];
-
-  vp8_prob Pnew [MVPcount_hp];
-
-  (void) rc;
-  vp8_copy_array(Pnew, default_mvc, MVPcount_hp);
-
-  vp8_zero(is_short_ct)
-  vp8_zero(sign_ct)
-  vp8_zero(bit_ct)
-  vp8_zero(short_ct)
-  vp8_zero(short_bct)
-
-
-  // j=0
-  {
-    const int c = events [mv_max_hp];
-
-    is_short_ct [0] += c;    // Short vector
-    short_ct [0] += c;       // Magnitude distribution
-  }
-
-  // j: 1 ~ mv_max (1023)
-  {
-    int j = 1;
-
-    do {
-      const int c1 = events [mv_max_hp + j];  // positive
-      const int c2 = events [mv_max_hp - j];  // negative
-      const int c  = c1 + c2;
-      int a = j;
-
-      sign_ct [0] += c1;
-      sign_ct [1] += c2;
-
-      if (a < mvnum_short_hp) {
-        is_short_ct [0] += c;     // Short vector
-        short_ct [a] += c;       // Magnitude distribution
-      } else {
-        int k = mvlong_width_hp - 1;
-        is_short_ct [1] += c;     // Long vector
-
-        /*  bit 3 not always encoded. */
-        do
-          bit_ct [k] [(a >> k) & 1] += c;
-
-        while (--k >= 0);
-      }
-    } while (++j <= mv_max_hp);
-  }
-
-  calc_prob(Pnew + mvpis_short_hp, is_short_ct);
-
-  calc_prob(Pnew + MVPsign_hp, sign_ct);
-
-  {
-    vp8_prob p [mvnum_short_hp - 1];    /* actually only need branch ct */
-    int j = 0;
-
-    vp8_tree_probs_from_distribution(
-      mvnum_short_hp, vp8_small_mvencodings_hp, vp8_small_mvtree_hp,
-      p, short_bct, short_ct,
-      256, 1
-    );
-
-    do
-      calc_prob(Pnew + MVPshort_hp + j, short_bct[j]);
-
-    while (++j < mvnum_short_hp - 1);
-  }
-
-  {
-    int j = 0;
-
-    do
-      calc_prob(Pnew + MVPbits_hp + j, bit_ct[j]);
-
-    while (++j < mvlong_width_hp);
-  }
-
-  update(bc, is_short_ct, Pcur + mvpis_short_hp, Pnew[mvpis_short_hp],
-         *Pupdate++, updated);
-
-  update(bc, sign_ct, Pcur + MVPsign_hp, Pnew[MVPsign_hp], *Pupdate++,
-         updated);
-
-  {
-    const vp8_prob *const new_p = Pnew + MVPshort_hp;
-    vp8_prob *const cur_p = Pcur + MVPshort_hp;
-
-    int j = 0;
-
-    do
-
-      update(bc, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
-    while (++j < mvnum_short_hp - 1);
-  }
-
-  {
-    const vp8_prob *const new_p = Pnew + MVPbits_hp;
-    vp8_prob *const cur_p = Pcur + MVPbits_hp;
-
-    int j = 0;
-
-    do
-
-      update(bc, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated);
-
-    while (++j < mvlong_width_hp);
-  }
-}
-
-void vp8_write_mvprobs_hp(VP8_COMP* const cpi, vp8_writer* const bc) {
-  MV_CONTEXT_HP *mvc = cpi->common.fc.mvc_hp;
-  int flags[2] = {0, 0};
-#ifdef ENTROPY_STATS
-  active_section = 4;
-#endif
-  write_component_probs_hp(
-    bc, &mvc[0], &vp8_default_mv_context_hp[0], &vp8_mv_update_probs_hp[0],
-    cpi->MVcount_hp[0], 0, &flags[0]
-  );
-  write_component_probs_hp(
-    bc, &mvc[1], &vp8_default_mv_context_hp[1], &vp8_mv_update_probs_hp[1],
-    cpi->MVcount_hp[1], 1, &flags[1]
-  );
-
-  if (flags[0] || flags[1])
-    vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp,
-                                      (const MV_CONTEXT_HP *)
-                                      cpi->common.fc.mvc_hp, flags);
-#ifdef ENTROPY_STATS
-  active_section = 5;
-#endif
-}
-
-#endif  /* CONFIG_NEWMVENTROPY */
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index c06831cb2..254536580 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -14,7 +14,6 @@
 
 #include "onyx_int.h"
 
-#if CONFIG_NEWMVENTROPY
 void vp8_write_nmvprobs(VP8_COMP* const, int usehp, vp8_writer* const);
 void vp8_encode_nmv(vp8_writer* const w, const MV* const mv,
                     const MV* const ref, const nmv_context* const mvctx);
@@ -27,19 +26,5 @@ void vp8_build_nmv_cost_table(int *mvjoint,
                               int usehp,
                               int mvc_flag_v,
                               int mvc_flag_h);
-#else  /* CONFIG_NEWMVENTROPY */
-void vp8_write_mvprobs(VP8_COMP* const, vp8_writer* const);
-void vp8_encode_motion_vector(vp8_writer* const, const MV* const,
-                              const MV_CONTEXT* const);
-void vp8_build_component_cost_table(int *mvcost[2],
-                                    const MV_CONTEXT*,
-                                    const int mvc_flag[2]);
-void vp8_write_mvprobs_hp(VP8_COMP* const, vp8_writer* const);
-void vp8_encode_motion_vector_hp(vp8_writer* const, const MV* const,
-                                 const MV_CONTEXT_HP* const);
-void vp8_build_component_cost_table_hp(int *mvcost[2],
-                                       const MV_CONTEXT_HP*,
-                                       const int mvc_flag[2]);
-#endif  /* CONFIG_NEWMVENTROPY */
 
 #endif
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 95f6d97d3..09d5a762e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -364,7 +364,8 @@ static void zz_motion_search(VP8_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r
 
   ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre);
 
-  VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16)(src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
+  vp8_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
+               (unsigned int *)(best_motion_err));
 }
 
 static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -387,7 +388,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
   int new_mv_mode_penalty = 256;
 
   // override the default variance function to use MSE
-  v_fn_ptr.vf    = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);
+  v_fn_ptr.vf = vp8_mse16x16;
 
   // Set up pointers for this macro block recon buffer
   xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -492,12 +493,7 @@ void vp8_first_pass(VP8_COMP *cpi) {
   {
     int flag[2] = {1, 1};
     vp8_init_mv_probs(cm);
-#if CONFIG_NEWMVENTROPY
     vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
-#else
-    vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
-    vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cm->fc.mvc_hp, flag);
-#endif
   }
 
   // for each macroblock row in image
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 356e32c3f..44e83fdc7 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -23,80 +23,6 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER
 void vp8_cmachine_specific_config(VP8_COMP *cpi) {
 #if CONFIG_RUNTIME_CPU_DETECT
   cpi->rtcd.common                    = &cpi->common.rtcd;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.sad32x32              = vp8_sad32x32_c;
-#endif
-  cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-  cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-  cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-  cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-  cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.sad32x32x3            = vp8_sad32x32x3_c;
-#endif
-  cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_c;
-  cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_c;
-  cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_c;
-  cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_c;
-  cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_c;
-
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.sad32x32x8            = vp8_sad32x32x8_c;
-#endif
-  cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_c;
-  cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_c;
-  cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_c;
-  cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_c;
-  cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_c;
-
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.sad32x32x4d           = vp8_sad32x32x4d_c;
-#endif
-  cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_c;
-  cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_c;
-  cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_c;
-  cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_c;
-  cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_c;
-#if ARCH_X86 || ARCH_X86_64
-  cpi->rtcd.variance.copy32xn              = vp8_copy32xn_c;
-#endif
-  cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-  cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-  cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-  cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-  cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.var32x32              = vp8_variance32x32_c;
-#endif
-
-  cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-  cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-  cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-  cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-  cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.subpixvar32x32        = vp8_sub_pixel_variance32x32_c;
-#endif
-  cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_c;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.halfpixvar32x32_h     = vp8_variance_halfpixvar32x32_h_c;
-#endif
-  cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_c;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.halfpixvar32x32_v     = vp8_variance_halfpixvar32x32_v_c;
-#endif
-  cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_c;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.halfpixvar32x32_hv    = vp8_variance_halfpixvar32x32_hv_c;
-#endif
-  cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_c;
-#if CONFIG_SUPERBLOCKS
-  cpi->rtcd.variance.subpixmse32x32        = vp8_sub_pixel_mse32x32_c;
-#endif
-
-  cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-  cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
 
   cpi->rtcd.fdct.short8x8                  = vp8_short_fdct8x8_c;
   cpi->rtcd.fdct.short16x16                = vp8_short_fdct16x16_c;
@@ -118,16 +44,11 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) {
   cpi->rtcd.search.refining_search         = vp8_refining_search_sad;
   cpi->rtcd.search.diamond_search          = vp8_diamond_search_sad;
   cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_c;
-  cpi->rtcd.variance.satd16x16             = vp8_satd16x16_c;
   cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
   cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
   cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
   cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
   cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-#if CONFIG_INTERNAL_STATS
-  cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_c;
-  cpi->rtcd.variance.ssimpf_16x16          = vp8_ssim_parms_16x16_c;
-#endif
 #endif
 
   vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c
index 180ee5870..2eecfcdad 100644
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c
@@ -83,10 +83,8 @@ static unsigned int do_16x16_motion_iteration
 
   vp8_set_mbmode_and_mvs(x, NEWMV, dst_mv);
   vp8_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
-  // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
-  best_err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
-             (xd->dst.y_buffer, xd->dst.y_stride,
-              xd->predictor, 16, INT_MAX);
+  best_err = vp8_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
+                          xd->predictor, 16, INT_MAX);
 
   /* restore UMV window */
   x->mv_col_min = tmp_col_min;
@@ -130,11 +128,8 @@ static int do_16x16_motion_search
   // FIXME should really use something like near/nearest MV and/or MV prediction
   xd->pre.y_buffer = ref->y_buffer + mb_y_offset;
   xd->pre.y_stride = ref->y_stride;
-  // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
-  err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
-        (ref->y_buffer + mb_y_offset,
-         ref->y_stride, xd->dst.y_buffer,
-         xd->dst.y_stride, INT_MAX);
+  err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
+                     xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
   dst_mv->as_int = 0;
 
   // Test last reference frame using the previous best mv as the
@@ -193,10 +188,8 @@ static int do_16x16_zerozero_search
   xd->pre.y_buffer = ref->y_buffer + mb_y_offset;
   xd->pre.y_stride = ref->y_stride;
   // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
-  err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
-        (ref->y_buffer + mb_y_offset,
-         ref->y_stride, xd->dst.y_buffer,
-         xd->dst.y_stride, INT_MAX);
+  err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
+                     xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
 
   dst_mv->as_int = 0;
 
@@ -221,11 +214,8 @@ static int find_best_16x16_intra
 
     xd->mode_info_context->mbmi.mode = mode;
     vp8_build_intra_predictors_mby(xd);
-    // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
-    err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
-          (xd->predictor, 16,
-           buf->y_buffer + mb_y_offset,
-           buf->y_stride, best_err);
+    err = vp8_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset,
+                       buf->y_stride, best_err);
     // find best
     if (err < best_err) {
       best_err  = err;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index a6cf2f18b..210887491 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -47,15 +47,9 @@ int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
   MV v;
   v.row = (mv->as_mv.row - ref->as_mv.row);
   v.col = (mv->as_mv.col - ref->as_mv.col);
-#if CONFIG_NEWMVENTROPY
   return ((mvjcost[vp8_get_mv_joint(v)] +
            mvcost[0][v.row] + mvcost[1][v.col]) *
           Weight) >> 7;
-#else
-  return ((mvcost[0][v.row >> (ishp == 0)] +
-           mvcost[1][v.col >> (ishp == 0)])
-          * Weight) >> 7;
-#endif
 }
 
 static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
@@ -64,14 +58,9 @@ static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
     MV v;
     v.row = (mv->as_mv.row - ref->as_mv.row);
     v.col = (mv->as_mv.col - ref->as_mv.col);
-#if CONFIG_NEWMVENTROPY
     return ((mvjcost[vp8_get_mv_joint(v)] +
              mvcost[0][v.row] + mvcost[1][v.col]) *
             error_per_bit + 128) >> 8;
-#else
-    return ((mvcost[0][v.row >> (ishp == 0)] +
-             mvcost[1][v.col >> (ishp == 0)]) * error_per_bit + 128) >> 8;
-#endif
   }
   return 0;
 }
@@ -83,14 +72,9 @@ static int mvsad_err_cost(int_mv *mv, int_mv *ref, DEC_MVSADCOSTS,
     MV v;
     v.row = (mv->as_mv.row - ref->as_mv.row);
     v.col = (mv->as_mv.col - ref->as_mv.col);
-#if CONFIG_NEWMVENTROPY
     return ((mvjsadcost[vp8_get_mv_joint(v)] +
              mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
             error_per_bit + 128) >> 8;
-#else
-    return ((mvsadcost[0][v.row] + mvsadcost[1][v.col])
-            * error_per_bit + 128) >> 8;
-#endif
   }
   return 0;
 }
@@ -220,35 +204,42 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
  * could reduce the area.
  */
 
-#if CONFIG_NEWMVENTROPY
 /* estimated cost of a motion vector (r,c) */
-#define MVC(r,c) \
-  (mvcost ? \
-  ((mvjcost[((r)!=rr)*2 + ((c)!=rc)] + \
-    mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * error_per_bit + 128 )>>8 : 0)
-#else
-#define MVC(r,c) \
-  (mvcost ? \
-  ((mvcost[0][((r)-rr)>>(xd->allow_high_precision_mv==0)] + \
-    mvcost[1][((c)-rc)>>(xd->allow_high_precision_mv==0)]) * \
-    error_per_bit + 128 )>>8 : 0)
-#endif  /* CONFIG_NEWMVENTROPY */
+#define MVC(r, c)                                       \
+    (mvcost ?                                           \
+     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
+       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+      error_per_bit + 128) >> 8 : 0)
 
-#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
+                                // for svf calc
 
-#define IFMVCV(r,c,s,e) \
-  if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
+#define IFMVCV(r, c, s, e)                                \
+    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
+      s                                                   \
+    else                                                  \
+      e;
 
 /* pointer to predictor base of a motionvector */
-#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset)))
+#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
 
 /* returns subpixel variance error function */
-#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
-
-/* checks if (r,c) has better score than previous best */
-#define CHECK_BETTER(v,r,c) \
-  IFMVCV(r,c,{thismse = (DIST(r,c)); if((v = MVC(r,c)+thismse) < besterr) \
-  { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)
+#define DIST(r, c) \
+    vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+    IFMVCV(r, c, {                                                       \
+      thismse = (DIST(r, c));                                            \
+      if ((v = MVC(r, c) + thismse) < besterr) {                         \
+        besterr = v;                                                     \
+        br = r;                                                          \
+        bc = c;                                                          \
+        *distortion = thismse;                                           \
+        *sse1 = sse;                                                     \
+      }                                                                  \
+    },                                                                   \
+    v = INT_MAX;)
 
 #define MIN(x,y) (((x)<(y))?(x):(y))
 #define MAX(x,y) (((x)>(y))?(x):(y))
@@ -307,17 +298,10 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   br = bestmv->as_mv.row << 3;
   bc = bestmv->as_mv.col << 3;
   hstep = 4;
-#if CONFIG_NEWMVENTROPY
   minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
   maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
   minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
   maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
-#else
-  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
-  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
-  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
-  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
-#endif
 
   tr = br;
   tc = bc;
@@ -403,13 +387,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     tc = bc;
   }
 
-#if CONFIG_NEWMVENTROPY
   if (xd->allow_high_precision_mv) {
     usehp = vp8_use_nmv_hp(&ref_mv->as_mv);
   } else {
     usehp = 0;
   }
-#endif
 
   if (usehp) {
     hstep >>= 1;
@@ -771,13 +753,11 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     *sse1 = sse;
   }
 
-#if CONFIG_NEWMVENTROPY
   if (x->e_mbd.allow_high_precision_mv) {
     usehp = vp8_use_nmv_hp(&ref_mv->as_mv);
   } else {
     usehp = 0;
   }
-#endif
   if (!usehp)
     return bestmse;
 
@@ -1304,16 +1284,8 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   MACROBLOCKD *xd = &x->e_mbd;
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1423,16 +1395,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   MACROBLOCKD *xd = &x->e_mbd;
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1479,7 +1443,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
         for (t = 0; t < 4; t++)
           block_offset[t] = ss[i + t].offset + best_address;
 
-        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
+        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
+                       sad_array);
 
         for (t = 0; t < 4; t++, i++) {
           if (sad_array[t] < bestsad) {
@@ -1631,16 +1596,8 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   int col_max = ref_col + distance;
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1735,16 +1692,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   unsigned int sad_array[3];
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -1872,16 +1821,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   unsigned int sad_array[3];
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -2022,16 +1963,8 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   MACROBLOCKD *xd = &x->e_mbd;
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
@@ -2106,16 +2039,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   MACROBLOCKD *xd = &x->e_mbd;
   int_mv fcenter_mv;
 
-#if CONFIG_NEWMVENTROPY
   int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-#else
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-  if (xd->allow_high_precision_mv) {
-    mvsadcost[0] = x->mvsadcost_hp[0];
-    mvsadcost[1] = x->mvsadcost_hp[1];
-  }
-#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index afca58084..f09106927 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -15,21 +15,12 @@
 #include "block.h"
 #include "variance.h"
 
-#if CONFIG_NEWMVENTROPY
 #define MVCOSTS mvjcost, mvcost
 #define MVSADCOSTS mvjsadcost, mvsadcost
 #define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
 #define DEC_MVSADCOSTS int *mvjsadcost, int *mvsadcost[2]
 #define NULLMVCOST NULL, NULL
 #define XMVCOST x->nmvjointcost, (x->e_mbd.allow_high_precision_mv?x->nmvcost_hp:x->nmvcost)
-#else
-#define MVCOSTS mvcost
-#define MVSADCOSTS mvsadcost
-#define DEC_MVCOSTS int *mvcost[2]
-#define DEC_MVSADCOSTS int *mvsadcost[2]
-#define NULLMVCOST NULL
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
-#endif  /* CONFIG_NEWMVENTROPY */
 
 #ifdef ENTROPY_STATS
 extern void init_mv_ref_counts();
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index b1abd1e2a..23b9973c3 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -46,14 +46,11 @@ void vp8_init_mode_costs(VP8_COMP *c) {
   vp8_cost_tokens(c->mb.i8x8_mode_costs,
                   x->fc.i8x8_mode_prob, vp8_i8x8_mode_tree);
 
-#if CONFIG_SWITCHABLE_INTERP
   {
     int i;
     for (i = 0; i <= VP8_SWITCHABLE_FILTERS; ++i)
-    //for (i = 0; i <= 0; ++i)
       vp8_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                       x->fc.switchable_interp_prob[i],
                       vp8_switchable_interp_tree);
   }
-#endif
 }
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 14e9e784a..f11ff5936 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -77,7 +77,7 @@ extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFF
 extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
 #endif
 
-int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
+int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
 
 extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance);
 
@@ -85,6 +85,7 @@ static void set_default_lf_deltas(VP8_COMP *cpi);
 
 extern const int vp8_gf_interval_table[101];
 
+#define DEFAULT_INTERP_FILTER EIGHTTAP  /* SWITCHABLE for better performance */
 #define SEARCH_BEST_FILTER 0            /* to search exhaustively for
                                            best filter */
 #define RESET_FOREACH_FILTER 0          /* whether to reset the encoder state
@@ -101,25 +102,14 @@ extern const int vp8_gf_interval_table[101];
 #if CONFIG_INTERNAL_STATS
 #include "math.h"
 
-extern double vp8_calc_ssim
-(
-  YV12_BUFFER_CONFIG *source,
-  YV12_BUFFER_CONFIG *dest,
-  int lumamask,
-  double *weight,
-  const vp8_variance_rtcd_vtable_t *rtcd
-);
+extern double vp8_calc_ssim(YV12_BUFFER_CONFIG *source,
+                            YV12_BUFFER_CONFIG *dest, int lumamask,
+                            double *weight);
 
 
-extern double vp8_calc_ssimg
-(
-  YV12_BUFFER_CONFIG *source,
-  YV12_BUFFER_CONFIG *dest,
-  double *ssim_y,
-  double *ssim_u,
-  double *ssim_v,
-  const vp8_variance_rtcd_vtable_t *rtcd
-);
+extern double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source,
+                             YV12_BUFFER_CONFIG *dest, double *ssim_y,
+                             double *ssim_u, double *ssim_v);
 
 
 #endif
@@ -149,12 +139,10 @@ extern int skip_false_count;
 extern int intra_mode_stats[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
 #endif
 
-#if CONFIG_NEWMVENTROPY
 #ifdef NMV_STATS
 extern void init_nmvstats();
 extern void print_nmvstats();
 #endif
-#endif
 
 #ifdef SPEEDSTATS
 unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -1630,7 +1618,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) {
   cpi->cq_target_quality = cpi->oxcf.cq_level;
 
   if (!cm->use_bilinear_mc_filter)
-    cm->mcomp_filter_type = EIGHTTAP;
+    cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
   else
     cm->mcomp_filter_type = BILINEAR;
 
@@ -1700,8 +1688,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) {
 #define M_LOG2_E 0.693147180559945309417
 #define log2f(x) (log (x) / (float) M_LOG2_E)
 
-#if CONFIG_NEWMVENTROPY
-
 static void cal_nmvjointsadcost(int *mvjointsadcost) {
   mvjointsadcost[0] = 600;
   mvjointsadcost[1] = 300;
@@ -1739,40 +1725,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
   } while (++i <= MV_MAX);
 }
 
-#else
-
-static void cal_mvsadcosts(int *mvsadcost[2]) {
-  int i = 1;
-
-  mvsadcost [0] [0] = 300;
-  mvsadcost [1] [0] = 300;
-
-  do {
-    double z = 256 * (2 * (log2f(8 * i) + .6));
-    mvsadcost [0][i] = (int) z;
-    mvsadcost [1][i] = (int) z;
-    mvsadcost [0][-i] = (int) z;
-    mvsadcost [1][-i] = (int) z;
-  } while (++i <= mvfp_max);
-}
-
-static void cal_mvsadcosts_hp(int *mvsadcost[2]) {
-  int i = 1;
-
-  mvsadcost [0] [0] = 300;
-  mvsadcost [1] [0] = 300;
-
-  do {
-    double z = 256 * (2 * (log2f(8 * i) + .6));
-    mvsadcost [0][i] = (int) z;
-    mvsadcost [1][i] = (int) z;
-    mvsadcost [0][-i] = (int) z;
-    mvsadcost [1][-i] = (int) z;
-  } while (++i <= mvfp_max_hp);
-}
-
-#endif  /* CONFIG_NEWMVENTROPY */
-
 VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   int i;
   volatile union {
@@ -1824,10 +1776,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
 #endif
   for (i = 0; i < COMP_PRED_CONTEXTS; i++)
     cm->prob_comppred[i]         = 128;
-#if CONFIG_TX_SELECT
   for (i = 0; i < TX_SIZE_MAX - 1; i++)
     cm->prob_tx[i]               = 128;
-#endif
 
   // Prime the recent reference frame useage counters.
   // Hereafter they will be maintained as a sort of moving average
@@ -1888,11 +1838,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   vp8_zero(inter_uv_modes);
   vp8_zero(inter_b_modes);
 #endif
-#if CONFIG_NEWMVENTROPY
 #ifdef NMV_STATS
   init_nmvstats();
 #endif
-#endif
 
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
@@ -1958,7 +1906,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   cpi->gf_rate_correction_factor  = 1.0;
   cpi->twopass.est_max_qcorrection_factor  = 1.0;
 
-#if CONFIG_NEWMVENTROPY
   cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
   cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
   cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX];
@@ -1971,19 +1918,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX];
   cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
   cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
-#else
-  cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max + 1];
-  cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max + 1];
-  cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max + 1];
-  cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max + 1];
-  cal_mvsadcosts(cpi->mb.mvsadcost);
-
-  cpi->mb.mvcost_hp[0] = &cpi->mb.mvcosts_hp[0][mv_max_hp + 1];
-  cpi->mb.mvcost_hp[1] = &cpi->mb.mvcosts_hp[1][mv_max_hp + 1];
-  cpi->mb.mvsadcost_hp[0] = &cpi->mb.mvsadcosts_hp[0][mvfp_max_hp + 1];
-  cpi->mb.mvsadcost_hp[1] = &cpi->mb.mvsadcosts_hp[1][mvfp_max_hp + 1];
-  cal_mvsadcosts_hp(cpi->mb.mvsadcost_hp);
-#endif  /* CONFIG_NEWMVENTROPY */
 
   for (i = 0; i < KEY_FRAME_CONTEXT; i++) {
     cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@@ -2027,74 +1961,48 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   init_mv_ref_counts();
 #endif
 
+#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \
+    cpi->fn_ptr[BT].sdf            = SDF; \
+    cpi->fn_ptr[BT].vf             = VF; \
+    cpi->fn_ptr[BT].svf            = SVF; \
+    cpi->fn_ptr[BT].svf_halfpix_h  = SVFHH; \
+    cpi->fn_ptr[BT].svf_halfpix_v  = SVFHV; \
+    cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
+    cpi->fn_ptr[BT].sdx3f          = SDX3F; \
+    cpi->fn_ptr[BT].sdx8f          = SDX8F; \
+    cpi->fn_ptr[BT].sdx4df         = SDX4DF;
+
+
 #if CONFIG_SUPERBLOCKS
-  cpi->fn_ptr[BLOCK_32X32].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32);
-  cpi->fn_ptr[BLOCK_32X32].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32);
-  cpi->fn_ptr[BLOCK_32X32].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar32x32);
-  cpi->fn_ptr[BLOCK_32X32].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_h);
-  cpi->fn_ptr[BLOCK_32X32].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_v);
-  cpi->fn_ptr[BLOCK_32X32].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_hv);
-  cpi->fn_ptr[BLOCK_32X32].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x3);
-  cpi->fn_ptr[BLOCK_32X32].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x8);
-  cpi->fn_ptr[BLOCK_32X32].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x4d);
+  BFP(BLOCK_32X32, vp8_sad32x32, vp8_variance32x32, vp8_sub_pixel_variance32x32,
+      vp8_variance_halfpixvar32x32_h, vp8_variance_halfpixvar32x32_v,
+      vp8_variance_halfpixvar32x32_hv, vp8_sad32x32x3, vp8_sad32x32x8,
+      vp8_sad32x32x4d)
 #endif
 
-  cpi->fn_ptr[BLOCK_16X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
-  cpi->fn_ptr[BLOCK_16X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
-  cpi->fn_ptr[BLOCK_16X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
-  cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);
-  cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
-  cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
-  cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
-  cpi->fn_ptr[BLOCK_16X16].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
-  cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
-
-  cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
-  cpi->fn_ptr[BLOCK_16X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
-  cpi->fn_ptr[BLOCK_16X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
-  cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;
-  cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
-  cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
-  cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
-  cpi->fn_ptr[BLOCK_16X8].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
-  cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
-
-  cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
-  cpi->fn_ptr[BLOCK_8X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
-  cpi->fn_ptr[BLOCK_8X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
-  cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;
-  cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
-  cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
-  cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
-  cpi->fn_ptr[BLOCK_8X16].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
-  cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
-
-  cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
-  cpi->fn_ptr[BLOCK_8X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
-  cpi->fn_ptr[BLOCK_8X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
-  cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;
-  cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
-  cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
-  cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
-  cpi->fn_ptr[BLOCK_8X8].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
-  cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
-
-  cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
-  cpi->fn_ptr[BLOCK_4X4].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
-  cpi->fn_ptr[BLOCK_4X4].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
-  cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;
-  cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
-  cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
-  cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
-  cpi->fn_ptr[BLOCK_4X4].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
-  cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
+  BFP(BLOCK_16X16, vp8_sad16x16, vp8_variance16x16, vp8_sub_pixel_variance16x16,
+       vp8_variance_halfpixvar16x16_h, vp8_variance_halfpixvar16x16_v,
+       vp8_variance_halfpixvar16x16_hv, vp8_sad16x16x3, vp8_sad16x16x8,
+       vp8_sad16x16x4d)
+
+  BFP(BLOCK_16X8, vp8_sad16x8, vp8_variance16x8, vp8_sub_pixel_variance16x8,
+      NULL, NULL, NULL, vp8_sad16x8x3, vp8_sad16x8x8, vp8_sad16x8x4d)
+
+  BFP(BLOCK_8X16, vp8_sad8x16, vp8_variance8x16, vp8_sub_pixel_variance8x16,
+      NULL, NULL, NULL, vp8_sad8x16x3, vp8_sad8x16x8, vp8_sad8x16x4d)
+
+  BFP(BLOCK_8X8, vp8_sad8x8, vp8_variance8x8, vp8_sub_pixel_variance8x8,
+      NULL, NULL, NULL, vp8_sad8x8x3, vp8_sad8x8x8, vp8_sad8x8x4d)
+
+  BFP(BLOCK_4X4, vp8_sad4x4, vp8_variance4x4, vp8_sub_pixel_variance4x4,
+      NULL, NULL, NULL, vp8_sad4x4x3, vp8_sad4x4x8, vp8_sad4x4x4d)
 
 #if ARCH_X86 || ARCH_X86_64
-  cpi->fn_ptr[BLOCK_16X16].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
-  cpi->fn_ptr[BLOCK_16X8].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
-  cpi->fn_ptr[BLOCK_8X16].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
-  cpi->fn_ptr[BLOCK_8X8].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
-  cpi->fn_ptr[BLOCK_4X4].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
+  cpi->fn_ptr[BLOCK_16X16].copymem  = vp8_copy32xn;
+  cpi->fn_ptr[BLOCK_16X8].copymem   = vp8_copy32xn;
+  cpi->fn_ptr[BLOCK_8X16].copymem   = vp8_copy32xn;
+  cpi->fn_ptr[BLOCK_8X8].copymem    = vp8_copy32xn;
+  cpi->fn_ptr[BLOCK_4X4].copymem    = vp8_copy32xn;
 #endif
 
   cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
@@ -2136,12 +2044,10 @@ void vp8_remove_compressor(VP8_PTR *ptr) {
       print_mode_context();
     }
 #endif
-#if CONFIG_NEWMVENTROPY
 #ifdef NMV_STATS
     if (cpi->pass != 1)
       print_nmvstats();
 #endif
-#endif
 
 #if CONFIG_INTERNAL_STATS
 
@@ -2370,8 +2276,7 @@ void vp8_remove_compressor(VP8_PTR *ptr) {
 
 static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
                                  unsigned char *recon, int recon_stride,
-                                 unsigned int cols, unsigned int rows,
-                                 vp8_variance_rtcd_vtable_t *rtcd) {
+                                 unsigned int cols, unsigned int rows) {
   unsigned int row, col;
   uint64_t total_sse = 0;
   int diff;
@@ -2380,9 +2285,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
     for (col = 0; col + 16 <= cols; col += 16) {
       unsigned int sse;
 
-      VARIANCE_INVOKE(rtcd, mse16x16)(orig + col, orig_stride,
-                                      recon + col, recon_stride,
-                                      &sse);
+      vp8_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse);
       total_sse += sse;
     }
 
@@ -2434,8 +2337,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) {
   pkt.kind = VPX_CODEC_PSNR_PKT;
   sse = calc_plane_error(orig->y_buffer, orig->y_stride,
                          recon->y_buffer, recon->y_stride,
-                         width, height,
-                         IF_RTCD(&cpi->rtcd.variance));
+                         width, height);
   pkt.data.psnr.sse[0] = sse;
   pkt.data.psnr.sse[1] = sse;
   pkt.data.psnr.samples[0] = width * height;
@@ -2446,8 +2348,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) {
 
   sse = calc_plane_error(orig->u_buffer, orig->uv_stride,
                          recon->u_buffer, recon->uv_stride,
-                         width, height,
-                         IF_RTCD(&cpi->rtcd.variance));
+                         width, height);
   pkt.data.psnr.sse[0] += sse;
   pkt.data.psnr.sse[2] = sse;
   pkt.data.psnr.samples[0] += width * height;
@@ -2455,8 +2356,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) {
 
   sse = calc_plane_error(orig->v_buffer, orig->uv_stride,
                          recon->v_buffer, recon->uv_stride,
-                         width, height,
-                         IF_RTCD(&cpi->rtcd.variance));
+                         width, height);
   pkt.data.psnr.sse[0] += sse;
   pkt.data.psnr.sse[3] = sse;
   pkt.data.psnr.samples[0] += width * height;
@@ -3034,13 +2934,10 @@ static void encode_frame_to_data_rate
 
   /* list of filters to search over */
   int mcomp_filters_to_search[] = {
-#if CONFIG_SWITCHABLE_INTERP
     EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE
-#else
-    EIGHTTAP, EIGHTTAP_SHARP, SIXTAP,
-#endif
   };
-  int mcomp_filters = sizeof(mcomp_filters_to_search) / sizeof(*mcomp_filters_to_search);
+  int mcomp_filters = sizeof(mcomp_filters_to_search) /
+      sizeof(*mcomp_filters_to_search);
   int mcomp_filter_index = 0;
   INT64 mcomp_filter_cost[4];
 
@@ -3265,12 +3162,7 @@ static void encode_frame_to_data_rate
       cm->mcomp_filter_type = mcomp_filters_to_search[0];
       mcomp_filter_index = 0;
     } else {
-#if CONFIG_SWITCHABLE_INTERP
-      cm->mcomp_filter_type = SWITCHABLE;
-#else
-      cm->mcomp_filter_type =
-          (Q < SHARP_FILTER_QTHRESH ? EIGHTTAP_SHARP : EIGHTTAP);
-#endif
+      cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
     }
     /* TODO: Decide this more intelligently */
     xd->allow_high_precision_mv = (Q < HIGH_PRECISION_MV_QTHRESH);
@@ -3428,8 +3320,7 @@ static void encode_frame_to_data_rate
     if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
       int last_q = Q;
       int kf_err = vp8_calc_ss_err(cpi->Source,
-                                   &cm->yv12_fb[cm->new_fb_idx],
-                                   IF_RTCD(&cpi->rtcd.variance));
+                                   &cm->yv12_fb[cm->new_fb_idx]);
 
       int high_err_target = cpi->ambient_err;
       int low_err_target = (cpi->ambient_err >> 1);
@@ -3584,7 +3475,6 @@ static void encode_frame_to_data_rate
     if (cpi->is_src_frame_alt_ref)
       Loop = FALSE;
 
-#if CONFIG_SWITCHABLE_INTERP
     if (cm->frame_type != KEY_FRAME &&
         !sf->search_best_filter &&
         cm->mcomp_filter_type == SWITCHABLE) {
@@ -3610,19 +3500,16 @@ static void encode_frame_to_data_rate
           if (count[i]) {
             cm->mcomp_filter_type = vp8_switchable_interp[i];
             Loop = TRUE;  /* Make sure to loop since the filter changed */
-            //loop_count = -1;
             break;
           }
         }
       }
     }
-#endif
 
     if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) {
       if (mcomp_filter_index < mcomp_filters) {
         INT64 err = vp8_calc_ss_err(cpi->Source,
-                                    &cm->yv12_fb[cm->new_fb_idx],
-                                    IF_RTCD(&cpi->rtcd.variance));
+                                    &cm->yv12_fb[cm->new_fb_idx]);
         INT64 rate = cpi->projected_frame_size << 8;
         mcomp_filter_cost[mcomp_filter_index] =
           (RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err));
@@ -3684,8 +3571,7 @@ static void encode_frame_to_data_rate
   // the force key frame
   if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) {
     cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
-                                       &cm->yv12_fb[cm->new_fb_idx],
-                                       IF_RTCD(&cpi->rtcd.variance));
+                                       &cm->yv12_fb[cm->new_fb_idx]);
   }
 
   // This frame's MVs are saved and will be used in next frame's MV
@@ -3758,18 +3644,12 @@ static void encode_frame_to_data_rate
 
   update_reference_frames(cm);
   vp8_copy(cpi->common.fc.coef_counts, cpi->coef_counts);
-#if CONFIG_HYBRIDTRANSFORM
   vp8_copy(cpi->common.fc.hybrid_coef_counts, cpi->hybrid_coef_counts);
-#endif
   vp8_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
   vp8_copy(cpi->common.fc.hybrid_coef_counts_8x8, cpi->hybrid_coef_counts_8x8);
-#endif
   vp8_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cpi->common.fc.hybrid_coef_counts_16x16,
            cpi->hybrid_coef_counts_16x16);
-#endif
   vp8_adapt_coef_probs(&cpi->common);
   if (cpi->common.frame_type != KEY_FRAME) {
     vp8_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
@@ -3780,14 +3660,8 @@ static void encode_frame_to_data_rate
     vp8_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
     vp8_adapt_mode_probs(&cpi->common);
 
-#if CONFIG_NEWMVENTROPY
     cpi->common.fc.NMVcount = cpi->NMVcount;
     vp8_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
-#else
-    vp8_copy(cpi->common.fc.MVcount, cpi->MVcount);
-    vp8_copy(cpi->common.fc.MVcount_hp, cpi->MVcount_hp);
-    vp8_adapt_mv_probs(&cpi->common);
-#endif  /* CONFIG_NEWMVENTROPY */
     vp8_update_mode_context(&cpi->common);
   }
 
@@ -3903,8 +3777,7 @@ static void encode_frame_to_data_rate
     vp8_clear_system_state();  // __asm emms;
 
     recon_err = vp8_calc_ss_err(cpi->Source,
-                                &cm->yv12_fb[cm->new_fb_idx],
-                                IF_RTCD(&cpi->rtcd.variance));
+                                &cm->yv12_fb[cm->new_fb_idx]);
 
     if (cpi->twopass.total_left_stats->coded_error != 0.0)
       fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
@@ -4390,16 +4263,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
         int64_t sq_error;
 
         ye = calc_plane_error(orig->y_buffer, orig->y_stride,
-                              recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height,
-                              IF_RTCD(&cpi->rtcd.variance));
+                              recon->y_buffer, recon->y_stride, orig->y_width,
+                              orig->y_height);
 
         ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
-                              recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
-                              IF_RTCD(&cpi->rtcd.variance));
+                              recon->u_buffer, recon->uv_stride, orig->uv_width,
+                              orig->uv_height);
 
         ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
-                              recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
-                              IF_RTCD(&cpi->rtcd.variance));
+                              recon->v_buffer, recon->uv_stride, orig->uv_width,
+                              orig->uv_height);
 
         sq_error = ye + ue + ve;
 
@@ -4419,16 +4292,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
           vp8_clear_system_state();
 
           ye = calc_plane_error(orig->y_buffer, orig->y_stride,
-                                pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height,
-                                IF_RTCD(&cpi->rtcd.variance));
+                                pp->y_buffer, pp->y_stride, orig->y_width,
+                                orig->y_height);
 
           ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
-                                pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
-                                IF_RTCD(&cpi->rtcd.variance));
+                                pp->u_buffer, pp->uv_stride, orig->uv_width,
+                                orig->uv_height);
 
           ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
-                                pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
-                                IF_RTCD(&cpi->rtcd.variance));
+                                pp->v_buffer, pp->uv_stride, orig->uv_width,
+                                orig->uv_height);
 
           sq_error = ye + ue + ve;
 
@@ -4441,8 +4314,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
           cpi->totalp  += frame_psnr2;
 
           frame_ssim2 = vp8_calc_ssim(cpi->Source,
-                                      &cm->post_proc_buffer, 1, &weight,
-                                      IF_RTCD(&cpi->rtcd.variance));
+                                      &cm->post_proc_buffer, 1, &weight);
 
           cpi->summed_quality += frame_ssim2 * weight;
           cpi->summed_weights += weight;
@@ -4461,7 +4333,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
       if (cpi->b_calculate_ssimg) {
         double y, u, v, frame_all;
         frame_all =  vp8_calc_ssimg(cpi->Source, cm->frame_to_show,
-                                    &y, &u, &v, IF_RTCD(&cpi->rtcd.variance));
+                                    &y, &u, &v);
         cpi->total_ssimg_y += y;
         cpi->total_ssimg_u += u;
         cpi->total_ssimg_v += v;
@@ -4604,19 +4476,19 @@ int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert
 
 
 
-int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd) {
+int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) {
   int i, j;
   int Total = 0;
 
   unsigned char *src = source->y_buffer;
   unsigned char *dst = dest->y_buffer;
-  (void)rtcd;
 
   // Loop through the Y plane raw and reconstruction data summing (square differences)
   for (i = 0; i < source->y_height; i += 16) {
     for (j = 0; j < source->y_width; j += 16) {
       unsigned int sse;
-      Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+      Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+                            &sse);
     }
 
     src += 16 * source->y_stride;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 01151280c..ab6802509 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -60,17 +60,10 @@
 #define VP8_TEMPORAL_ALT_REF 1
 
 typedef struct {
-#if CONFIG_NEWMVENTROPY
   nmv_context nmvc;
   int nmvjointcost[MV_JOINTS];
   int nmvcosts[2][MV_VALS];
   int nmvcosts_hp[2][MV_VALS];
-#else
-  MV_CONTEXT mvc[2];
-  int mvcosts[2][MVvals + 1];
-  MV_CONTEXT_HP mvc_hp[2];
-  int mvcosts_hp[2][MVvals_hp + 1];
-#endif
 
 #ifdef MODE_STATS
   // Stats
@@ -97,24 +90,18 @@ typedef struct {
 
   vp8_prob coef_probs[BLOCK_TYPES]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM
   vp8_prob hybrid_coef_probs[BLOCK_TYPES]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
 
   vp8_prob coef_probs_8x8[BLOCK_TYPES_8X8]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM8X8
   vp8_prob hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
 
   vp8_prob coef_probs_16x16[BLOCK_TYPES_16X16]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
 
   vp8_prob ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */
   vp8_prob uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1];
@@ -123,10 +110,8 @@ typedef struct {
   vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1];
   vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1];
 
-#if CONFIG_SWITCHABLE_INTERP
   vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS + 1]
                                  [VP8_SWITCHABLE_FILTERS - 1];
-#endif
 
   int mv_ref_ct[6][4][2];
   int mode_context[6][4];
@@ -365,7 +350,6 @@ typedef struct {
 
 typedef struct VP8_ENCODER_RTCD {
   VP8_COMMON_RTCD            *common;
-  vp8_variance_rtcd_vtable_t  variance;
   vp8_fdct_rtcd_vtable_t      fdct;
   vp8_encodemb_rtcd_vtable_t  encodemb;
   vp8_search_rtcd_vtable_t    search;
@@ -373,10 +357,10 @@ typedef struct VP8_ENCODER_RTCD {
 } VP8_ENCODER_RTCD;
 
 enum {
-  BLOCK_16X8,
-  BLOCK_8X16,
-  BLOCK_8X8,
-  BLOCK_4X4,
+  BLOCK_16X8 = PARTITIONING_16X8,
+  BLOCK_8X16 = PARTITIONING_8X16,
+  BLOCK_8X8 = PARTITIONING_8X8,
+  BLOCK_4X4 = PARTITIONING_4X4,
   BLOCK_16X16,
   BLOCK_MAX_SEGMENTS,
   BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
@@ -465,13 +449,11 @@ typedef struct VP8_COMP {
   int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
   int comp_pred_count[COMP_PRED_CONTEXTS];
   int single_pred_count[COMP_PRED_CONTEXTS];
-#if CONFIG_TX_SELECT
   // FIXME contextualize
   int txfm_count[TX_SIZE_MAX];
   int txfm_count_8x8p[TX_SIZE_MAX - 1];
   int64_t rd_tx_select_diff[NB_TXFM_MODES];
   int rd_tx_select_threshes[4][NB_TXFM_MODES];
-#endif
 
   int RDMULT;
   int RDDIV;
@@ -563,39 +545,28 @@ typedef struct VP8_COMP {
   // int uv_mode_count[VP8_UV_MODES];       /* intra MB type cts this frame */
   int y_uv_mode_count[VP8_YMODES][VP8_UV_MODES];
 
-#if CONFIG_NEWMVENTROPY
   nmv_context_counts NMVcount;
-#else
-  unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */
-  unsigned int MVcount_hp [2] [MVvals_hp];  /* (row,col) MV cts this frame */
-#endif
 
   unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM
   unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_hybrid_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#endif
 
   unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM8X8
   unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_hybrid_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#endif
 
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM16X16
   unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#endif
 
   int gfu_boost;
   int last_boost;
@@ -780,10 +751,8 @@ typedef struct VP8_COMP {
   int pred_filter_on_count;
   int pred_filter_off_count;
 #endif
-#if CONFIG_SWITCHABLE_INTERP
-  unsigned int switchable_interp_count[VP8_SWITCHABLE_FILTERS+1]
+  unsigned int switchable_interp_count[VP8_SWITCHABLE_FILTERS + 1]
                                       [VP8_SWITCHABLE_FILTERS];
-#endif
 
 #if CONFIG_NEW_MVREF
   unsigned int best_ref_index_counts[MAX_MV_REFS];
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 954997889..57bd41468 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -21,7 +21,8 @@
 #include "vpx_ports/arm.h"
 #endif
 
-extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
+extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source,
+                           YV12_BUFFER_CONFIG *dest);
 #if HAVE_ARMV7
 extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
 #endif
@@ -71,7 +72,8 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst
 
   vpx_memcpy(dst_y, src_y, ystride * (linestocopy + 16));
 }
-static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd) {
+static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
+                                    YV12_BUFFER_CONFIG *dest, int Fraction) {
   int i, j;
   int Total = 0;
   int srcoffset, dstoffset;
@@ -79,7 +81,6 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
   unsigned char *dst = dest->y_buffer;
 
   int linestocopy = (source->y_height >> (Fraction + 4));
-  (void)rtcd;
 
   if (linestocopy < 1)
     linestocopy = 1;
@@ -97,7 +98,8 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
   for (i = 0; i < linestocopy; i += 16) {
     for (j = 0; j < source->y_width; j += 16) {
       unsigned int sse;
-      Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+      Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+                            &sse);
     }
 
     src += 16 * source->y_stride;
@@ -179,7 +181,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
   // Get the err using the previous frame's filter value.
   vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
 
-  best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+  best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3);
 
   //  Re-instate the unfiltered frame
   vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
@@ -192,7 +194,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
     vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
 
     // Get the err for filtered frame
-    filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+    filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3);
 
     //  Re-instate the unfiltered frame
     vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
@@ -221,7 +223,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
       vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
 
       // Get the err for filtered frame
-      filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+      filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3);
 
       //  Re-instate the unfiltered frame
       vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
@@ -308,7 +310,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme
   vp8cx_set_alt_lf_level(cpi, filt_mid);
   vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_mid, segment);
 
-  best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+  best_err = vp8_calc_ss_err(sd, cm->frame_to_show);
   filt_best = filt_mid;
 
   //  Re-instate the unfiltered frame
@@ -348,7 +350,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme
       vp8cx_set_alt_lf_level(cpi, filt_low);
       vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_low, segment);
 
-      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
 
       //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
@@ -383,7 +385,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme
       vp8cx_set_alt_lf_level(cpi, filt_high);
       vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_high, segment);
 
-      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
 
       //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
@@ -517,7 +519,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
   vp8cx_set_alt_lf_level(cpi, filt_mid);
   vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
 
-  best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+  best_err = vp8_calc_ss_err(sd, cm->frame_to_show);
   filt_best = filt_mid;
 
   //  Re-instate the unfiltered frame
@@ -557,7 +559,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
       vp8cx_set_alt_lf_level(cpi, filt_low);
       vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
 
-      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
 
       //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
@@ -592,7 +594,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) {
       vp8cx_set_alt_lf_level(cpi, filt_high);
       vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
 
-      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
+      filt_err = vp8_calc_ss_err(sd, cm->frame_to_show);
 
       //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index b6a1f27f8..16b4e6e1d 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -21,7 +21,6 @@
 extern int enc_debug;
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
 void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
   int i, rc, eob;
   int zbin;
@@ -85,7 +84,6 @@ void vp8_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
 
   d->eob = eob + 1;
 }
-#endif
 
 void vp8_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
   int i, rc, eob;
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 1375ed0b0..e39433fc3 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -30,11 +30,9 @@
 #include "arm/quantize_arm.h"
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
 #define prototype_quantize_block_type(sym) \
   void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
 extern prototype_quantize_block_type(vp8_ht_quantize_b_4x4);
-#endif
 
 #ifndef vp8_quantize_quantb_4x4
 #define vp8_quantize_quantb_4x4 vp8_regular_quantize_b_4x4
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 570bedfe9..cc3c82e74 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -132,17 +132,10 @@ void vp8_save_coding_context(VP8_COMP *cpi) {
   // intended for use in a re-code loop in vp8_compress_frame where the
   // quantizer value is adjusted between loop iterations.
 
-#if CONFIG_NEWMVENTROPY
   cc->nmvc = cm->fc.nmvc;
   vp8_copy(cc->nmvjointcost,  cpi->mb.nmvjointcost);
   vp8_copy(cc->nmvcosts,  cpi->mb.nmvcosts);
   vp8_copy(cc->nmvcosts_hp,  cpi->mb.nmvcosts_hp);
-#else
-  vp8_copy(cc->mvc,      cm->fc.mvc);
-  vp8_copy(cc->mvcosts,  cpi->mb.mvcosts);
-  vp8_copy(cc->mvc_hp,     cm->fc.mvc_hp);
-  vp8_copy(cc->mvcosts_hp,  cpi->mb.mvcosts_hp);
-#endif
 
   vp8_copy(cc->mv_ref_ct, cm->fc.mv_ref_ct);
   vp8_copy(cc->mode_context, cm->fc.mode_context);
@@ -178,20 +171,12 @@ void vp8_save_coding_context(VP8_COMP *cpi) {
   vp8_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas);
 
   vp8_copy(cc->coef_probs, cm->fc.coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
   vp8_copy(cc->hybrid_coef_probs, cm->fc.hybrid_coef_probs);
-#endif
   vp8_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
   vp8_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8);
-#endif
   vp8_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16);
-#endif
-#if CONFIG_SWITCHABLE_INTERP
   vp8_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
-#endif
 }
 
 void vp8_restore_coding_context(VP8_COMP *cpi) {
@@ -202,17 +187,10 @@ void vp8_restore_coding_context(VP8_COMP *cpi) {
   // Restore key state variables to the snapshot state stored in the
   // previous call to vp8_save_coding_context.
 
-#if CONFIG_NEWMVENTROPY
   cm->fc.nmvc = cc->nmvc;
   vp8_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
   vp8_copy(cpi->mb.nmvcosts, cc->nmvcosts);
   vp8_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
-#else
-  vp8_copy(cm->fc.mvc, cc->mvc);
-  vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
-  vp8_copy(cm->fc.mvc_hp, cc->mvc_hp);
-  vp8_copy(cpi->mb.mvcosts_hp, cc->mvcosts_hp);
-#endif
 
   vp8_copy(cm->fc.mv_ref_ct, cc->mv_ref_ct);
   vp8_copy(cm->fc.mode_context, cc->mode_context);
@@ -249,20 +227,12 @@ void vp8_restore_coding_context(VP8_COMP *cpi) {
   vp8_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas);
 
   vp8_copy(cm->fc.coef_probs, cc->coef_probs);
-#if CONFIG_HYBRIDTRANSFORM
   vp8_copy(cm->fc.hybrid_coef_probs, cc->hybrid_coef_probs);
-#endif
   vp8_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8);
-#if CONFIG_HYBRIDTRANSFORM8X8
   vp8_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8);
-#endif
   vp8_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
-#if CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16);
-#endif
-#if CONFIG_SWITCHABLE_INTERP
   vp8_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
-#endif
 }
 
 
@@ -275,16 +245,6 @@ void vp8_setup_key_frame(VP8_COMP *cpi) {
   vp8_default_bmode_probs(cm->fc.bmode_prob);
 
   vp8_init_mv_probs(& cpi->common);
-#if CONFIG_NEWMVENTROPY == 0
-  /* this is not really required */
-  {
-    int flag[2] = {1, 1};
-    vp8_build_component_cost_table(
-        cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
-    vp8_build_component_cost_table_hp(
-        cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag);
-  }
-#endif
 
   // cpi->common.filter_level = 0;      // Reset every key frame.
   cpi->common.filter_level = cpi->common.base_qindex * 3 / 8;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 4b9e90725..e919de36f 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -60,10 +60,8 @@ extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
 
 #define INVALID_MV 0x80008000
 
-#if CONFIG_SWITCHABLE_INTERP
 /* Factor to weigh the rate for switchable interp filters */
 #define SWITCHABLE_INTERP_RATE_FACTOR 1
-#endif
 
 static const int auto_speed_thresh[17] = {
   1000,
@@ -355,37 +353,31 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) {
     cpi->mb.token_costs[TX_4X4],
     (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs,
     BLOCK_TYPES);
-#if CONFIG_HYBRIDTRANSFORM
   fill_token_costs(
     cpi->mb.hybrid_token_costs[TX_4X4],
     (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11])
     cpi->common.fc.hybrid_coef_probs,
     BLOCK_TYPES);
-#endif
 
   fill_token_costs(
     cpi->mb.token_costs[TX_8X8],
     (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8,
     BLOCK_TYPES_8X8);
-#if CONFIG_HYBRIDTRANSFORM8X8
   fill_token_costs(
     cpi->mb.hybrid_token_costs[TX_8X8],
     (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11])
     cpi->common.fc.hybrid_coef_probs_8x8,
     BLOCK_TYPES_8X8);
-#endif
 
   fill_token_costs(
     cpi->mb.token_costs[TX_16X16],
     (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16,
     BLOCK_TYPES_16X16);
-#if CONFIG_HYBRIDTRANSFORM16X16
   fill_token_costs(
     cpi->mb.hybrid_token_costs[TX_16X16],
     (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11])
     cpi->common.fc.hybrid_coef_probs_16x16,
     BLOCK_TYPES_16X16);
-#endif
 
   /*rough estimate for costing*/
   cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
@@ -393,14 +385,12 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) {
 
   if (cpi->common.frame_type != KEY_FRAME)
   {
-#if CONFIG_NEWMVENTROPY
     vp8_build_nmv_cost_table(
         cpi->mb.nmvjointcost,
         cpi->mb.e_mbd.allow_high_precision_mv ?
         cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
         &cpi->common.fc.nmvc,
         cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
-#endif
   }
 }
 
@@ -409,19 +399,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi) {
 
   milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
 
-#if 0
-
-  if (0) {
-    FILE *f;
-
-    f = fopen("speed.stt", "a");
-    fprintf(f, " %8ld %10ld %10ld %10ld\n",
-            cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
-    fclose(f);
-  }
-
-#endif
-
   /*
   // this is done during parameter valid check
   if( cpi->oxcf.cpu_used > 16)
@@ -520,7 +497,7 @@ int vp8_mbuverror_c(MACROBLOCK *mb) {
   return error;
 }
 
-int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) {
+int vp8_uvsse(MACROBLOCK *x) {
   unsigned char *uptr, *vptr;
   unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
   unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
@@ -551,16 +528,14 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) {
   vptr = x->e_mbd.pre.v_buffer + offset;
 
   if ((mv_row | mv_col) & 7) {
-    VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
-                                        (mv_col & 7) << 1, (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
-    VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
-                                        (mv_col & 7) << 1, (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
+    vp8_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
+                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
+    vp8_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
+                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
     sse2 += sse1;
   } else {
-    VARIANCE_INVOKE(rtcd, var8x8)(uptr, pre_stride,
-                                  upred_ptr, uv_stride, &sse2);
-    VARIANCE_INVOKE(rtcd, var8x8)(vptr, pre_stride,
-                                  vpred_ptr, uv_stride, &sse1);
+    vp8_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
+    vp8_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
     sse2 += sse1;
   }
   return sse2;
@@ -607,9 +582,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
   short *qcoeff_ptr = b->qcoeff;
   MACROBLOCKD *xd = &mb->e_mbd;
   MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi;
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   TX_TYPE tx_type = DCT_DCT;
-#endif
   int segment_id = mbmi->segment_id;
 
   switch (tx_size) {
@@ -617,7 +590,6 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
       scan = vp8_default_zig_zag1d;
       band = vp8_coef_bands;
       default_eob = 16;
-#if CONFIG_HYBRIDTRANSFORM
       if (type == PLANE_TYPE_Y_WITH_DC) {
         tx_type = get_tx_type_4x4(xd, b);
         if (tx_type != DCT_DCT) {
@@ -636,14 +608,12 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
           }
         }
       }
-#endif
 
       break;
     case TX_8X8:
       scan = vp8_default_zig_zag1d_8x8;
       band = vp8_coef_bands_8x8;
       default_eob = 64;
-#if CONFIG_HYBRIDTRANSFORM8X8
       if (type == PLANE_TYPE_Y_WITH_DC) {
         BLOCKD *bb;
         int ib = (b - xd->block);
@@ -653,17 +623,14 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
           tx_type = get_tx_type_8x8(xd, bb);
         }
       }
-#endif
       break;
     case TX_16X16:
       scan = vp8_default_zig_zag1d_16x16;
       band = vp8_coef_bands_16x16;
       default_eob = 256;
-#if CONFIG_HYBRIDTRANSFORM16X16
       if (type == PLANE_TYPE_Y_WITH_DC) {
         tx_type = get_tx_type_16x16(xd, b);
       }
-#endif
       break;
     default:
       break;
@@ -675,7 +642,6 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
 
   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type != DCT_DCT) {
     for (; c < eob; c++) {
       int v = qcoeff_ptr[scan[c]];
@@ -687,9 +653,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
     if (c < seg_eob)
       cost += mb->hybrid_token_costs[tx_size][type][band[c]]
           [pt][DCT_EOB_TOKEN];
-  } else
-#endif
-  {
+  } else {
     for (; c < eob; c++) {
       int v = qcoeff_ptr[scan[c]];
       int t = vp8_dct_value_tokens_ptr[v].Token;
@@ -870,9 +834,7 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
   MACROBLOCKD *xd = &mb->e_mbd;
   BLOCKD *b  = &mb->e_mbd.block[0];
   BLOCK  *be = &mb->block[0];
-#if CONFIG_HYBRIDTRANSFORM16X16
   TX_TYPE tx_type;
-#endif
 
   ENCODEMB_INVOKE(&rtcd->encodemb, submby)(
     mb->src_diff,
@@ -880,24 +842,18 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
     mb->e_mbd.predictor,
     mb->block[0].src_stride);
 
-#if CONFIG_HYBRIDTRANSFORM16X16
   tx_type = get_tx_type_16x16(xd, b);
   if (tx_type != DCT_DCT) {
     vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 16);
   } else
     vp8_transform_mby_16x16(mb);
-#else
-  vp8_transform_mby_16x16(mb);
-#endif
 
   vp8_quantize_mby_16x16(mb);
-#if CONFIG_HYBRIDTRANSFORM16X16
   // TODO(jingning) is it possible to quickly determine whether to force
   //                trailing coefficients to be zero, instead of running trellis
   //                optimization in the rate-distortion optimization loop?
   if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
     vp8_optimize_mby_16x16(mb, rtcd);
-#endif
 
   d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0);
 
@@ -913,8 +869,6 @@ static void macro_block_yrd(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
   VP8_COMMON *cm = &cpi->common;
   MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
 
-#if CONFIG_TX_SELECT
-
   MACROBLOCKD *xd = &x->e_mbd;
   int can_skip = cm->mb_no_coeff_skip;
   vp8_prob skip_prob = can_skip ? get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
@@ -1022,25 +976,6 @@ static void macro_block_yrd(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
   else
     txfm_cache[TX_MODE_SELECT] = rd4x4s < rd8x8s ? rd4x4s : rd8x8s;
 
-#else /* CONFIG_TX_SELECT */
-
-  switch (cpi->common.txfm_mode) {
-    case ALLOW_16X16:
-      macro_block_yrd_16x16(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable);
-      mbmi->txfm_size = TX_16X16;
-      break;
-    case ALLOW_8X8:
-      macro_block_yrd_8x8(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable);
-      mbmi->txfm_size = TX_8X8;
-      break;
-    default:
-    case ONLY_4X4:
-      macro_block_yrd_4x4(x, rate, distortion, IF_RTCD(&cpi->rtcd), skippable);
-      mbmi->txfm_size = TX_4X4;
-      break;
-  }
-
-#endif /* CONFIG_TX_SELECT */
 }
 
 static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
@@ -1155,10 +1090,8 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
 
   ENTROPY_CONTEXT ta = *a, tempa = *a;
   ENTROPY_CONTEXT tl = *l, templ = *l;
-#if CONFIG_HYBRIDTRANSFORM
   TX_TYPE tx_type = DCT_DCT;
   TX_TYPE best_tx_type = DCT_DCT;
-#endif
   /*
    * The predictor buffer is a 2d buffer with a stride of 16.  Create
    * a temp buffer that meets the stride requirements, but we are only
@@ -1191,7 +1124,6 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
       ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
 
       b->bmi.as_mode.first = mode;
-#if CONFIG_HYBRIDTRANSFORM
       tx_type = get_tx_type_4x4(xd, b);
       if (tx_type != DCT_DCT) {
         vp8_fht_c(be->src_diff, be->coeff, 32, tx_type, 4);
@@ -1200,10 +1132,6 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
         x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4(be, b);
       }
-#else
-      x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
-      x->quantize_b_4x4(be, b);
-#endif
 
       tempa = ta;
       templ = tl;
@@ -1221,9 +1149,7 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
         *bestdistortion = distortion;
         best_rd = this_rd;
         *best_mode = mode;
-#if CONFIG_HYBRIDTRANSFORM
         best_tx_type = tx_type;
-#endif
 
 #if CONFIG_COMP_INTRA_PRED
         *best_second_mode = mode2;
@@ -1242,17 +1168,12 @@ static int64_t rd_pick_intra4x4block(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be,
   b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
   // inverse transform
   if (best_tx_type != DCT_DCT)
     vp8_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4);
   else
     IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
         best_dqcoeff, b->diff, 32);
-#else
-  IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
-      best_dqcoeff, b->diff, 32);
-#endif
 
   vp8_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
@@ -1405,11 +1326,9 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
   int64_t this_rd;
   MACROBLOCKD *xd = &x->e_mbd;
 
-#if CONFIG_TX_SELECT
   int i;
   for (i = 0; i < NB_TXFM_MODES; i++)
     txfm_cache[i] = INT64_MAX;
-#endif
 
   // Y Search for 16x16 intra prediction mode
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
@@ -1452,7 +1371,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
         *skippable = skip;
       }
 
-#if CONFIG_TX_SELECT
       for (i = 0; i < NB_TXFM_MODES; i++) {
         int64_t adj_rd = this_rd + local_txfm_cache[i] -
                           local_txfm_cache[cpi->common.txfm_mode];
@@ -1460,7 +1378,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
           txfm_cache[i] = adj_rd;
         }
       }
-#endif
 
 #if CONFIG_COMP_INTRA_PRED
     }
@@ -1535,22 +1452,18 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
       vp8_subtract_4b_c(be, b, 16);
 
       if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
-#if CONFIG_HYBRIDTRANSFORM8X8
         TX_TYPE tx_type = get_tx_type_8x8(xd, b);
         if (tx_type != DCT_DCT)
           vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, tx_type, 8);
         else
           x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
-#else
-        x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
-#endif
         x->quantize_b_8x8(x->block + idx, xd->block + idx);
 
         // compute quantization mse of 8x8 block
         distortion = vp8_block_error_c((x->block + idx)->coeff,
                                        (xd->block + idx)->dqcoeff, 64);
-        ta0 = *(a + vp8_block2above_8x8[idx]);
-        tl0 = *(l + vp8_block2left_8x8 [idx]);
+        ta0 = a[vp8_block2above_8x8[idx]];
+        tl0 = l[vp8_block2left_8x8[idx]];
 
         rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
                              &ta0, &tl0, TX_8X8);
@@ -1576,10 +1489,10 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
         distortion += vp8_block_error_c((x->block + ib + 5)->coeff,
                                         (xd->block + ib + 5)->dqcoeff, 16);
 
-        ta0 = *(a + vp8_block2above[ib]);
-        ta1 = *(a + vp8_block2above[ib + 1]);
-        tl0 = *(l + vp8_block2above[ib]);
-        tl1 = *(l + vp8_block2above[ib + 4]);
+        ta0 = a[vp8_block2above[ib]];
+        ta1 = a[vp8_block2above[ib + 1]];
+        tl0 = l[vp8_block2left[ib]];
+        tl1 = l[vp8_block2left[ib + 4]];
         rate_t = cost_coeffs(x, xd->block + ib, PLANE_TYPE_Y_WITH_DC,
                              &ta0, &tl0, TX_4X4);
         rate_t += cost_coeffs(x, xd->block + ib + 1, PLANE_TYPE_Y_WITH_DC,
@@ -1621,15 +1534,15 @@ static int64_t rd_pick_intra8x8block(VP8_COMP *cpi, MACROBLOCK *x, int ib,
   vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib);
 
   if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
-    *(a + vp8_block2above_8x8[idx])     = besta0;
-    *(a + vp8_block2above_8x8[idx] + 1) = besta1;
-    *(l + vp8_block2left_8x8 [idx])     = bestl0;
-    *(l + vp8_block2left_8x8 [idx] + 1) = bestl1;
+    a[vp8_block2above_8x8[idx]]     = besta0;
+    a[vp8_block2above_8x8[idx] + 1] = besta1;
+    l[vp8_block2left_8x8[idx]]      = bestl0;
+    l[vp8_block2left_8x8[idx] + 1]  = bestl1;
   } else {
-    *(a + vp8_block2above[ib])     = besta0;
-    *(a + vp8_block2above[ib + 1]) = besta1;
-    *(l + vp8_block2above[ib])     = bestl0;
-    *(l + vp8_block2above[ib + 4]) = bestl1;
+    a[vp8_block2above[ib]]     = besta0;
+    a[vp8_block2above[ib + 1]] = besta1;
+    l[vp8_block2left[ib]]      = bestl0;
+    l[vp8_block2left[ib + 4]]  = bestl1;
   }
 
   return best_rd;
@@ -2223,12 +2136,22 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
                                            int which_label,
                                            int *labelyrate,
                                            int *distortion,
+                                           int64_t *otherrd,
                                            ENTROPY_CONTEXT *ta,
                                            ENTROPY_CONTEXT *tl,
                                            const VP8_ENCODER_RTCD *rtcd) {
   int i, j;
   MACROBLOCKD *xd = &x->e_mbd;
   const int iblock[4] = { 0, 1, 4, 5 };
+  int othercost = 0, otherdist = 0;
+  ENTROPY_CONTEXT_PLANES tac, tlc;
+  ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac,
+                  *tlcp = (ENTROPY_CONTEXT *) &tlc;
+
+  if (otherrd) {
+    memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES));
+    memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES));
+  }
 
   *distortion = 0;
   *labelyrate = 0;
@@ -2236,8 +2159,9 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
     int ib = vp8_i8x8_block[i];
 
     if (labels[ib] == which_label) {
-      BLOCKD *bd = &xd->block[ib];
-      BLOCK *be = &x->block[ib];
+      int idx = (ib & 8) + ((ib & 2) << 1);
+      BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
+      BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
       int thisdistortion;
 
       vp8_build_inter_predictors4b(xd, bd, 16);
@@ -2245,24 +2169,66 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
         vp8_build_2nd_inter_predictors4b(xd, bd, 16);
       vp8_subtract_4b_c(be, bd, 16);
 
-      for (j = 0; j < 4; j += 2) {
-        bd = &xd->block[ib + iblock[j]];
-        be = &x->block[ib + iblock[j]];
-        x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32);
-        x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
-        thisdistortion = vp8_block_error_c(be->coeff, bd->dqcoeff, 32);
+      if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
+        if (otherrd) {
+          x->vp8_short_fdct8x8(be->src_diff, be2->coeff, 32);
+          x->quantize_b_8x8(be2, bd2);
+          thisdistortion = vp8_block_error_c(be2->coeff, bd2->dqcoeff, 64);
+          otherdist += thisdistortion;
+          othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+                                     tacp + vp8_block2above_8x8[idx],
+                                     tlcp + vp8_block2left_8x8[idx], TX_8X8);
+        }
+        for (j = 0; j < 4; j += 2) {
+          bd = &xd->block[ib + iblock[j]];
+          be = &x->block[ib + iblock[j]];
+          x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+          thisdistortion = vp8_block_error_c(be->coeff, bd->dqcoeff, 32);
+          *distortion += thisdistortion;
+          *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+                                     ta + vp8_block2above[ib + iblock[j]],
+                                     tl + vp8_block2left[ib + iblock[j]],
+                                     TX_4X4);
+          *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
+                                     ta + vp8_block2above[ib + iblock[j] + 1],
+                                     tl + vp8_block2left[ib + iblock[j]],
+                                     TX_4X4);
+        }
+      } else /* 8x8 */ {
+        if (otherrd) {
+          for (j = 0; j < 4; j += 2) {
+            BLOCKD *bd3 = &xd->block[ib + iblock[j]];
+            BLOCK *be3 = &x->block[ib + iblock[j]];
+            x->vp8_short_fdct8x4(be3->src_diff, be3->coeff, 32);
+            x->quantize_b_4x4_pair(be3, be3 + 1, bd3, bd3 + 1);
+            thisdistortion = vp8_block_error_c(be3->coeff, bd3->dqcoeff, 32);
+            otherdist += thisdistortion;
+            othercost += cost_coeffs(x, bd3, PLANE_TYPE_Y_WITH_DC,
+                                     tacp + vp8_block2above[ib + iblock[j]],
+                                     tlcp + vp8_block2left[ib + iblock[j]],
+                                     TX_4X4);
+            othercost += cost_coeffs(x, bd3 + 1, PLANE_TYPE_Y_WITH_DC,
+                                     tacp + vp8_block2above[ib + iblock[j] + 1],
+                                     tlcp + vp8_block2left[ib + iblock[j]],
+                                     TX_4X4);
+          }
+        }
+        x->vp8_short_fdct8x8(be->src_diff, be2->coeff, 32);
+        x->quantize_b_8x8(be2, bd2);
+        thisdistortion = vp8_block_error_c(be2->coeff, bd2->dqcoeff, 64);
         *distortion += thisdistortion;
-        *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
-                                   ta + vp8_block2above[ib + iblock[j]],
-                                   tl + vp8_block2left[ib + iblock[j]], TX_4X4);
-        *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
-                                   ta + vp8_block2above[ib + iblock[j] + 1],
-                                   tl + vp8_block2left[ib + iblock[j]],
-                                   TX_4X4);
+        *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+                                   ta + vp8_block2above_8x8[idx],
+                                   tl + vp8_block2left_8x8[idx], TX_8X8);
       }
     }
   }
   *distortion >>= 2;
+  if (otherrd) {
+    othercost >>= 2;
+    *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist);
+  }
   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
 }
 
@@ -2274,7 +2240,8 @@ typedef struct {
   int_mv mvp;
 
   int64_t segment_rd;
-  int segment_num;
+  SPLITMV_PARTITIONING_TYPE segment_num;
+  TX_SIZE txfm_size;
   int r;
   int d;
   int segment_yrate;
@@ -2300,9 +2267,14 @@ int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
   return r;
 }
 
-static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-                             BEST_SEG_INFO *bsi, unsigned int segmentation,
-                             int_mv seg_mvs[16 /* n_blocks */][MAX_REF_FRAMES - 1]) {
+static void rd_check_segment_txsize(VP8_COMP *cpi, MACROBLOCK *x,
+                                    BEST_SEG_INFO *bsi,
+                                    SPLITMV_PARTITIONING_TYPE segmentation,
+                                    TX_SIZE tx_size, int64_t *otherrds,
+                                    int64_t *rds, int *completed,
+                                    /* 16 = n_blocks */
+                                    int_mv seg_mvs[16 /* n_blocks */]
+                                                  [MAX_REF_FRAMES - 1]) {
   int i, j;
   int const *labels;
   int br = 0, bd = 0;
@@ -2310,12 +2282,12 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
 
   int label_count;
-  int64_t this_segment_rd = 0;
+  int64_t this_segment_rd = 0, other_segment_rd;
   int label_mv_thresh;
   int rate = 0;
   int sbr = 0, sbd = 0;
   int segmentyrate = 0;
-  uint8_t best_eobs[16];
+  uint8_t best_eobs[16] = { 0 };
 
   vp8_variance_fn_ptr_t *v_fn_ptr;
 
@@ -2343,20 +2315,23 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   label_mv_thresh = 1 * bsi->mvthresh / label_count;
 
   // Segmentation method overheads
-  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
+  rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
+                        vp8_mbsplit_encodings + segmentation);
   rate += vp8_cost_mv_ref(cpi, SPLITMV, bsi->mdcounts);
   this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
   br += rate;
+  other_segment_rd = this_segment_rd;
 
-  for (i = 0; i < label_count; i++) {
+  mbmi->txfm_size = tx_size;
+  for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
     int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
-    int64_t best_label_rd = INT64_MAX;
+    int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
     B_PREDICTION_MODE mode_selected = ZERO4X4;
     int bestlabelyrate = 0;
 
     // search for the best motion vector on this segment
     for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
-      int64_t this_rd;
+      int64_t this_rd, other_rd;
       int distortion;
       int labelyrate;
       ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
@@ -2378,21 +2353,23 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
         BLOCK *c;
         BLOCKD *e;
 
-        // Is the best so far sufficiently good that we cant justify doing and new motion search.
+        /* Is the best so far sufficiently good that we cant justify doing
+         * and new motion search. */
         if (best_label_rd < label_mv_thresh)
           break;
 
         if (cpi->compressor_speed) {
-          if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
+          if (segmentation == PARTITIONING_8X16 ||
+              segmentation == PARTITIONING_16X8) {
             bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
-            if (i == 1 && segmentation == BLOCK_16X8)
+            if (i == 1 && segmentation == PARTITIONING_16X8)
               bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
 
             step_param = bsi->sv_istep[i];
           }
 
           // use previous block's result as next block's MV predictor.
-          if (segmentation == BLOCK_4X4 && i > 0) {
+          if (segmentation == PARTITIONING_4X4 && i > 0) {
             bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
             if (i == 4 || i == 8 || i == 12)
               bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
@@ -2424,7 +2401,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
           // Should we do a full search (best quality only)
           if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
             /* Check if mvp_full is within the range. */
-            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+            vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+                         x->mv_row_min, x->mv_row_max);
 
             thissme = cpi->full_search_sad(x, c, e, &mvp_full,
                                            sadpb, 16, v_fn_ptr,
@@ -2434,7 +2412,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
               bestsme = thissme;
               mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
             } else {
-              // The full search result is actually worse so re-instate the previous best vector
+              /* The full search result is actually worse so re-instate the
+               * previous best vector */
               e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
             }
           }
@@ -2444,15 +2423,16 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
           int distortion;
           unsigned int sse;
           cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
-                                       bsi->ref_mv, x->errorperbit, v_fn_ptr, XMVCOST,
-                                       &distortion, &sse);
+                                       bsi->ref_mv, x->errorperbit, v_fn_ptr,
+                                       XMVCOST, &distortion, &sse);
 
           // safe motion search result for use in compound prediction
           seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
         }
       } /* NEW4X4 */
       else if (mbmi->second_ref_frame && this_mode == NEW4X4) {
-        // motion search not completed? Then skip newmv for this block with comppred
+        /* motion search not completed? Then skip newmv for this block with
+         * comppred */
         if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
             seg_mvs[i][mbmi->ref_frame        - 1].as_int == INVALID_MV) {
           continue;
@@ -2474,14 +2454,15 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
           mv_check_bounds(x, &second_mode_mv[this_mode]))
         continue;
 
-      if (segmentation == BLOCK_4X4) {
+      if (segmentation == PARTITIONING_4X4) {
         this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
                                           &distortion,
                                           ta_s, tl_s, IF_RTCD(&cpi->rtcd));
+        other_rd = this_rd;
       } else {
         this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
-                                              &distortion, ta_s, tl_s,
-                                              IF_RTCD(&cpi->rtcd));
+                                              &distortion, &other_rd,
+                                              ta_s, tl_s, IF_RTCD(&cpi->rtcd));
       }
       this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
       rate += labelyrate;
@@ -2492,9 +2473,20 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
         bestlabelyrate = labelyrate;
         mode_selected = this_mode;
         best_label_rd = this_rd;
-        for (j = 0; j < 16; j++)
-          if (labels[j] == i)
-            best_eobs[j] = x->e_mbd.block[j].eob;
+        if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
+          for (j = 0; j < 16; j++)
+            if (labels[j] == i)
+              best_eobs[j] = x->e_mbd.block[j].eob;
+        } else {
+          for (j = 0; j < 4; j++) {
+            int ib = vp8_i8x8_block[j], idx = j * 4;
+
+            if (labels[ib] == i)
+              best_eobs[idx] = x->e_mbd.block[idx].eob;
+          }
+        }
+        if (other_rd < best_other_rd)
+          best_other_rd = other_rd;
 
         vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
         vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -2506,18 +2498,18 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
     vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
 
     labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
-                &second_mode_mv[mode_selected], seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, XMVCOST);
+                &second_mode_mv[mode_selected], seg_mvs[i],
+                bsi->ref_mv, bsi->second_ref_mv, XMVCOST);
 
     br += sbr;
     bd += sbd;
     segmentyrate += bestlabelyrate;
     this_segment_rd += best_label_rd;
-
-    if (this_segment_rd >= bsi->segment_rd) {
-      break;
-    }
-
-
+    other_segment_rd += best_other_rd;
+    if (rds)
+      rds[i] = this_segment_rd;
+    if (otherrds)
+      rds[i] = other_segment_rd;
   } /* for each label */
 
   if (this_segment_rd < bsi->segment_rd) {
@@ -2526,6 +2518,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
     bsi->segment_yrate = segmentyrate;
     bsi->segment_rd = this_segment_rd;
     bsi->segment_num = segmentation;
+    bsi->txfm_size = mbmi->txfm_size;
 
     // store everything needed to come back to this!!
     for (i = 0; i < 16; i++) {
@@ -2538,6 +2531,105 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
       bsi->eobs[i] = best_eobs[i];
     }
   }
+
+  if (completed) {
+    *completed = i;
+  }
+}
+
+static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
+                             BEST_SEG_INFO *bsi,
+                             unsigned int segmentation,
+                             /* 16 = n_blocks */
+                             int_mv seg_mvs[16][MAX_REF_FRAMES - 1],
+                             int64_t txfm_cache[NB_TXFM_MODES]) {
+  int i, n, c = vp8_mbsplit_count[segmentation];
+
+  if (segmentation == PARTITIONING_4X4) {
+    int64_t rd[16];
+
+    rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL,
+                            rd, &n, seg_mvs);
+    if (n == c) {
+      for (i = 0; i < NB_TXFM_MODES; i++) {
+        if (rd[c - 1] < txfm_cache[i])
+          txfm_cache[i] = rd[c - 1];
+      }
+    }
+  } else {
+    int64_t diff, base_rd;
+    int cost4x4 = vp8_cost_bit(cpi->common.prob_tx[0], 0);
+    int cost8x8 = vp8_cost_bit(cpi->common.prob_tx[0], 1);
+
+    if (cpi->common.txfm_mode == TX_MODE_SELECT) {
+      int64_t rd4x4[4], rd8x8[4];
+      int n4x4, n8x8, nmin;
+      BEST_SEG_INFO bsi4x4, bsi8x8;
+
+      /* factor in cost of cost4x4/8x8 in decision */
+      vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi));
+      vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi));
+      rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation,
+                              TX_4X4, NULL, rd4x4, &n4x4, seg_mvs);
+      rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation,
+                              TX_8X8, NULL, rd8x8, &n8x8, seg_mvs);
+      if (bsi4x4.segment_num == segmentation) {
+        bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
+        if (bsi4x4.segment_rd < bsi->segment_rd)
+          vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi));
+      }
+      if (bsi8x8.segment_num == segmentation) {
+        bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
+        if (bsi8x8.segment_rd < bsi->segment_rd)
+          vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi));
+      }
+      n = n4x4 > n8x8 ? n4x4 : n8x8;
+      if (n == c) {
+        nmin = n4x4 < n8x8 ? n4x4 : n8x8;
+        diff = rd8x8[nmin - 1] - rd4x4[nmin - 1];
+        if (n == n4x4) {
+          base_rd = rd4x4[c - 1];
+        } else {
+          base_rd = rd8x8[c - 1] - diff;
+        }
+      }
+    } else {
+      int64_t rd[4], otherrd[4];
+
+      if (cpi->common.txfm_mode == ONLY_4X4) {
+        rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd,
+                                rd, &n, seg_mvs);
+        if (n == c) {
+          base_rd = rd[c - 1];
+          diff = otherrd[c - 1] - rd[c - 1];
+        }
+      } else /* use 8x8 transform */ {
+        rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd,
+                                rd, &n, seg_mvs);
+        if (n == c) {
+          diff = rd[c - 1] - otherrd[c - 1];
+          base_rd = otherrd[c - 1];
+        }
+      }
+    }
+
+    if (n == c) {
+      if (base_rd < txfm_cache[ONLY_4X4]) {
+        txfm_cache[ONLY_4X4] = base_rd;
+      }
+      if (base_rd + diff < txfm_cache[1]) {
+        txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff;
+      }
+      if (diff < 0) {
+        base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
+      } else {
+        base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
+      }
+      if (base_rd < txfm_cache[TX_MODE_SELECT]) {
+        txfm_cache[TX_MODE_SELECT] = base_rd;
+      }
+    }
+  }
 }
 
 static __inline
@@ -2553,17 +2645,26 @@ void vp8_cal_step_param(int sr, int *sp) {
   *sp = MAX_MVSEARCH_STEPS - 1 - step;
 }
 
-static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
-                                           int_mv *best_ref_mv, int_mv *second_best_ref_mv, int64_t best_rd,
-                                           int *mdcounts, int *returntotrate,
-                                           int *returnyrate, int *returndistortion,
-                                           int *skippable, int mvthresh,
-                                           int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1]) {
+static int rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
+                                       int_mv *best_ref_mv,
+                                       int_mv *second_best_ref_mv,
+                                       int64_t best_rd,
+                                       int *mdcounts,
+                                       int *returntotrate,
+                                       int *returnyrate,
+                                       int *returndistortion,
+                                       int *skippable, int mvthresh,
+                                       int_mv seg_mvs[NB_PARTITIONINGS]
+                                                     [16 /* n_blocks */]
+                                                     [MAX_REF_FRAMES - 1],
+                                       int64_t txfm_cache[NB_TXFM_MODES]) {
   int i;
   BEST_SEG_INFO bsi;
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
 
   vpx_memset(&bsi, 0, sizeof(bsi));
+  for (i = 0; i < NB_TXFM_MODES; i++)
+    txfm_cache[i] = INT64_MAX;
 
   bsi.segment_rd = best_rd;
   bsi.ref_mv = best_ref_mv;
@@ -2571,6 +2672,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   bsi.mvp.as_int = best_ref_mv->as_int;
   bsi.mvthresh = mvthresh;
   bsi.mdcounts = mdcounts;
+  bsi.txfm_size = TX_4X4;
 
   for (i = 0; i < 16; i++)
     bsi.modes[i] = ZERO4X4;
@@ -2578,15 +2680,19 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   if (cpi->compressor_speed == 0) {
     /* for now, we will keep the original segmentation order
        when in best quality mode */
-    rd_check_segment(cpi, x, &bsi, BLOCK_16X8, seg_mvs[BLOCK_16X8]);
-    rd_check_segment(cpi, x, &bsi, BLOCK_8X16, seg_mvs[BLOCK_8X16]);
-    rd_check_segment(cpi, x, &bsi, BLOCK_8X8,  seg_mvs[BLOCK_8X8]);
-    rd_check_segment(cpi, x, &bsi, BLOCK_4X4,  seg_mvs[BLOCK_4X4]);
+    rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
+                     seg_mvs[PARTITIONING_16X8], txfm_cache);
+    rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
+                     seg_mvs[PARTITIONING_8X16], txfm_cache);
+    rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
+                     seg_mvs[PARTITIONING_8X8], txfm_cache);
+    rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
+                     seg_mvs[PARTITIONING_4X4], txfm_cache);
   } else {
     int sr;
 
-    rd_check_segment(cpi, x, &bsi, BLOCK_8X8, seg_mvs[BLOCK_8X8]);
-
+    rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
+                     seg_mvs[PARTITIONING_8X8], txfm_cache);
 
     if (bsi.segment_rd < best_rd) {
       int tmp_col_min = x->mv_col_min;
@@ -2602,34 +2708,40 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
       bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
       bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
 
-      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
+      /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
+       * according to the closeness of 2 MV. */
       /* block 8X16 */
-      {
-        sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
-        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
+      sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
+                (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
+      vp8_cal_step_param(sr, &bsi.sv_istep[0]);
 
-        sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
-        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
+      sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
+                (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+      vp8_cal_step_param(sr, &bsi.sv_istep[1]);
 
-        rd_check_segment(cpi, x, &bsi, BLOCK_8X16, seg_mvs[BLOCK_8X16]);
-      }
+      rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
+                       seg_mvs[PARTITIONING_8X16], txfm_cache);
 
       /* block 16X8 */
-      {
-        sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
-        vp8_cal_step_param(sr, &bsi.sv_istep[0]);
+      sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
+                (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
+      vp8_cal_step_param(sr, &bsi.sv_istep[0]);
 
-        sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
-        vp8_cal_step_param(sr, &bsi.sv_istep[1]);
+      sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
+                (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+      vp8_cal_step_param(sr, &bsi.sv_istep[1]);
 
-        rd_check_segment(cpi, x, &bsi, BLOCK_16X8, seg_mvs[BLOCK_16X8]);
-      }
+      rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
+                       seg_mvs[PARTITIONING_16X8], txfm_cache);
 
       /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
       /* Not skip 4x4 if speed=0 (good quality) */
-      if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) { /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
+      if (cpi->sf.no_skip_block4x4_search ||
+          bsi.segment_num == PARTITIONING_8X8) {
+        /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
         bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
-        rd_check_segment(cpi, x, &bsi, BLOCK_4X4, seg_mvs[BLOCK_4X4]);
+        rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
+                         seg_mvs[PARTITIONING_4X4], txfm_cache);
       }
 
       /* restore UMV window */
@@ -2653,9 +2765,12 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   *returntotrate = bsi.r;
   *returndistortion = bsi.d;
   *returnyrate = bsi.segment_yrate;
-  *skippable = mby_is_skippable_4x4(&x->e_mbd, 0);
+  *skippable = bsi.txfm_size == TX_4X4 ?
+                    mby_is_skippable_4x4(&x->e_mbd, 0) :
+                    mby_is_skippable_8x8(&x->e_mbd, 0);
 
   /* save partitions */
+  mbmi->txfm_size = bsi.txfm_size;
   mbmi->partitioning = bsi.segment_num;
   x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
 
@@ -2901,9 +3016,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
 void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
                        int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
-#if CONFIG_NEWMVENTROPY
   MV mv;
-#endif
 
   if (mbmi->mode == SPLITMV) {
     int i;
@@ -2911,7 +3024,6 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
     for (i = 0; i < x->partition_info->count; i++) {
       if (x->partition_info->bmi[i].mode == NEW4X4) {
         if (x->e_mbd.allow_high_precision_mv) {
-#if CONFIG_NEWMVENTROPY
           mv.row = (x->partition_info->bmi[i].mv.as_mv.row
                     - best_ref_mv->as_mv.row);
           mv.col = (x->partition_info->bmi[i].mv.as_mv.col
@@ -2925,20 +3037,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
             vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
                               &cpi->NMVcount, 1);
           }
-#else
-          cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.row
-                                          - best_ref_mv->as_mv.row)]++;
-          cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.col
-                                          - best_ref_mv->as_mv.col)]++;
-          if (mbmi->second_ref_frame) {
-            cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.row
-                                            - second_best_ref_mv->as_mv.row)]++;
-            cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.col
-                                            - second_best_ref_mv->as_mv.col)]++;
-          }
-#endif
         } else {
-#if CONFIG_NEWMVENTROPY
           mv.row = (x->partition_info->bmi[i].mv.as_mv.row
                     - best_ref_mv->as_mv.row);
           mv.col = (x->partition_info->bmi[i].mv.as_mv.col
@@ -2952,24 +3051,11 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
             vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
                               &cpi->NMVcount, 0);
           }
-#else
-          cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row
-                                     - best_ref_mv->as_mv.row) >> 1)]++;
-          cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col
-                                     - best_ref_mv->as_mv.col) >> 1)]++;
-          if (mbmi->second_ref_frame) {
-            cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.row
-                                       - second_best_ref_mv->as_mv.row) >> 1)]++;
-            cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.col
-                                       - second_best_ref_mv->as_mv.col) >> 1)]++;
-          }
-#endif
         }
       }
     }
   } else if (mbmi->mode == NEWMV) {
     if (x->e_mbd.allow_high_precision_mv) {
-#if CONFIG_NEWMVENTROPY
       mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
       mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
       vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
@@ -2978,20 +3064,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
         mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
         vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1);
       }
-#else
-      cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[0].as_mv.row
-                                      - best_ref_mv->as_mv.row)]++;
-      cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[0].as_mv.col
-                                      - best_ref_mv->as_mv.col)]++;
-      if (mbmi->second_ref_frame) {
-        cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row
-                                        - second_best_ref_mv->as_mv.row)]++;
-        cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col
-                                        - second_best_ref_mv->as_mv.col)]++;
-      }
-#endif
     } else {
-#if CONFIG_NEWMVENTROPY
       mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
       mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
       vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
@@ -3000,18 +3073,6 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
         mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
         vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0);
       }
-#else
-      cpi->MVcount[0][mv_max + ((mbmi->mv[0].as_mv.row
-                                 - best_ref_mv->as_mv.row) >> 1)]++;
-      cpi->MVcount[1][mv_max + ((mbmi->mv[0].as_mv.col
-                                 - best_ref_mv->as_mv.col) >> 1)]++;
-      if (mbmi->second_ref_frame) {
-        cpi->MVcount[0][mv_max + ((mbmi->mv[1].as_mv.row
-                                   - second_best_ref_mv->as_mv.row) >> 1)]++;
-        cpi->MVcount[1][mv_max + ((mbmi->mv[1].as_mv.col
-                                   - second_best_ref_mv->as_mv.col) >> 1)]++;
-      }
-#endif
     }
   }
 }
@@ -3185,9 +3246,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                                  int hybrid_pred_diff,
                                  int64_t txfm_size_diff[NB_TXFM_MODES]) {
   MACROBLOCKD *xd = &x->e_mbd;
-#if CONFIG_TX_SELECT
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
-#endif
 
   // Take a snapshot of the coding context so it can be
   // restored if we decide to encode this way
@@ -3207,9 +3266,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
   ctx->comp_pred_diff   = comp_pred_diff;
   ctx->hybrid_pred_diff = hybrid_pred_diff;
 
-#if CONFIG_TX_SELECT
   memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
-#endif
 }
 
 static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode,
@@ -3326,9 +3383,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   int_mv ref_mv[MAX_REF_FRAMES] = {{0}};
 #endif
 
-#if CONFIG_SWITCHABLE_INTERP
   int switchable_filter_index = 0;
-#endif
 
   MB_PREDICTION_MODE uv_intra_mode;
   MB_PREDICTION_MODE uv_intra_mode_8x8 = 0;
@@ -3344,7 +3399,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4];
 
   unsigned int ref_costs[MAX_REF_FRAMES];
-  int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1];
+  int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
 
   vpx_memset(mode8x8, 0, sizeof(mode8x8));
   vpx_memset(&frame_mv, 0, sizeof(frame_mv));
@@ -3359,7 +3414,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   for (i = 0; i < NB_TXFM_MODES; i++)
     best_txfm_rd[i] = INT64_MAX;
 
-  for (i = 0; i < BLOCK_MAX_SEGMENTS - 1; i++) {
+  for (i = 0; i < NB_PARTITIONINGS; i++) {
     int j, k;
 
     for (j = 0; j < 16; j++)
@@ -3425,12 +3480,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   // that depend on the current prediction etc.
   vp8_estimate_ref_frame_costs(cpi, segment_id, ref_costs);
 
-#if CONFIG_SWITCHABLE_INTERP
   for (mode_index = 0; mode_index < MAX_MODES;
        mode_index += (!switchable_filter_index)) {
-#else
-  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
-#endif
     int64_t this_rd = INT64_MAX;
     int is_comp_pred;
     int disable_skip = 0, skippable = 0;
@@ -3458,19 +3509,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 #if CONFIG_PRED_FILTER
     mbmi->pred_filter_enabled = 0;
 #endif
-#if CONFIG_SWITCHABLE_INTERP
     if (cpi->common.mcomp_filter_type == SWITCHABLE &&
         this_mode >= NEARESTMV && this_mode <= SPLITMV) {
       mbmi->interp_filter =
           vp8_switchable_interp[switchable_filter_index++];
       if (switchable_filter_index == VP8_SWITCHABLE_FILTERS)
         switchable_filter_index = 0;
-        //printf("Searching %d (%d)\n", this_mode, switchable_filter_index);
     } else {
       mbmi->interp_filter = cpi->common.mcomp_filter_type;
     }
     vp8_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-#endif
 
     // Test best rd so far against threshold for trying this mode.
     if (best_rd <= cpi->rd_threshes[mode_index])
@@ -3612,11 +3660,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         }
         break;
         case I8X8_PRED: {
-#if CONFIG_TX_SELECT
           int cost0 = vp8_cost_bit(cm->prob_tx[0], 0);
           int cost1 = vp8_cost_bit(cm->prob_tx[0], 1);
           int64_t tmp_rd_4x4s, tmp_rd_8x8s;
-#endif
           int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
           int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
           mbmi->txfm_size = TX_4X4;
@@ -3638,7 +3684,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
           txfm_cache[ONLY_4X4]  = tmp_rd_4x4;
           txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
           txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
-#if CONFIG_TX_SELECT
           tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
           tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
           txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s;
@@ -3667,9 +3712,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
               mode8x8[1][3] = x->e_mbd.mode_info_context->bmi[10].as_mode.second;
 #endif
             }
-          } else
-#endif
-          if (cm->txfm_mode == ONLY_4X4) {
+          } else if (cm->txfm_mode == ONLY_4X4) {
             rate = r4x4;
             rate_y = tok4x4;
             distortion = d4x4;
@@ -3725,21 +3768,19 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
               (mbmi->ref_frame == GOLDEN_FRAME) ?
           cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
 
-      mbmi->txfm_size = TX_4X4; // FIXME use 8x8 in case of 8x8/8x16/16x8
-      tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
-                                               second_ref, best_yrd, mdcounts,
-                                               &rate, &rate_y, &distortion,
-                                               &skippable,
-                                               this_rd_thresh, seg_mvs);
+      tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+                                           second_ref, best_yrd, mdcounts,
+                                           &rate, &rate_y, &distortion,
+                                           &skippable,
+                                           this_rd_thresh, seg_mvs,
+                                           txfm_cache);
       rate2 += rate;
       distortion2 += distortion;
 
-#if CONFIG_SWITCHABLE_INTERP
       if (cpi->common.mcomp_filter_type == SWITCHABLE)
         rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
             [get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
                 [vp8_switchable_interp_map[mbmi->interp_filter]];
-#endif
       // If even the 'Y' rd value of split is higher than best so far
       // then dont bother looking at UV
       if (tmp_rd < best_yrd) {
@@ -3877,13 +3918,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
       rate2 += vp8_cost_bit(cpi->common.prob_pred_filter_off,
                             xd->mode_info_context->mbmi.pred_filter_enabled);
 #endif
-#if CONFIG_SWITCHABLE_INTERP
       if (cpi->common.mcomp_filter_type == SWITCHABLE)
         rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
             [get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
             [vp8_switchable_interp_map[
             x->e_mbd.mode_info_context->mbmi.interp_filter]];
-#endif
 
       /* We don't include the cost of the second reference here, because there are only
        * three options: Last/Golden, ARF/Last or Golden/ARF, or in other words if you
@@ -3908,8 +3947,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         if (threshold < x->encode_breakout)
           threshold = x->encode_breakout;
 
-        var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-              (*(b->base_src), b->src_stride,
+        var = vp8_variance16x16(*(b->base_src), b->src_stride,
                x->e_mbd.predictor, 16, &sse);
 
         if (sse < threshold) {
@@ -3919,7 +3957,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
           if ((sse - var < q2dc *q2dc >> 4) ||
               (sse / 2 > var && sse - var < 64)) {
             // Check u and v to make sure skip is ok
-            int sse2 =  VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
+            int sse2 =  vp8_uvsse(x);
             if (sse2 * 2 < threshold) {
               x->skip = 1;
               distortion2 = sse + sse2;
@@ -4127,7 +4165,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
       if (!mode_excluded && this_rd != INT64_MAX) {
         for (i = 0; i < NB_TXFM_MODES; i++) {
           int64_t adj_rd;
-          if (this_mode != B_PRED && this_mode != SPLITMV) {
+          if (this_mode != B_PRED) {
             adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
           } else {
             adj_rd = this_rd;
@@ -4151,7 +4189,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   else
     ++cpi->pred_filter_off_count;
 #endif
-#if CONFIG_SWITCHABLE_INTERP
   if (cpi->common.mcomp_filter_type == SWITCHABLE &&
       best_mbmode.mode >= NEARESTMV &&
       best_mbmode.mode <= SPLITMV) {
@@ -4159,7 +4196,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         [get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
         [vp8_switchable_interp_map[best_mbmode.interp_filter]];
   }
-#endif
 
   // Reduce the activation RD thresholds for the best choice mode
   if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
@@ -4185,11 +4221,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
       (cpi->oxcf.arnr_max_frames == 0) &&
       (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
     mbmi->mode = ZEROMV;
-#if CONFIG_TX_SELECT
     if (cm->txfm_mode != TX_MODE_SELECT)
       mbmi->txfm_size = cm->txfm_mode;
     else
-#endif
       mbmi->txfm_size = TX_16X16;
     mbmi->ref_frame = ALTREF_FRAME;
     mbmi->mv[0].as_int = 0;
@@ -4239,7 +4273,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
       best_pred_diff[i] = best_rd - best_pred_rd[i];
   }
 
-#if CONFIG_TX_SELECT
   if (!x->skip) {
     for (i = 0; i < NB_TXFM_MODES; i++) {
       if (best_txfm_rd[i] == INT64_MAX)
@@ -4250,7 +4283,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
   } else {
     vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
   }
-#endif
 
 end:
   store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition,
@@ -4381,10 +4413,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
            vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 1);
     dist = dist16x16 + (distuv8x8 >> 2);
     mbmi->txfm_size = txfm_size_16x16;
-#if CONFIG_TX_SELECT
     memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
            sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
   } else if (error8x8 > error16x16) {
     if (error4x4 < error16x16) {
       rate = rateuv;
@@ -4401,20 +4431,16 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
       mbmi->mode = B_PRED;
       mbmi->txfm_size = TX_4X4;
       dist = dist4x4 + (distuv >> 2);
-#if CONFIG_TX_SELECT
       memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
              sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
     } else {
       mbmi->txfm_size = txfm_size_16x16;
       mbmi->mode = mode16x16;
       rate = rate16x16 + rateuv8x8;
       dist = dist16x16 + (distuv8x8 >> 2);
-#if CONFIG_TX_SELECT
       for (i = 0; i < NB_TXFM_MODES; i++) {
         x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i];
       }
-#endif
     }
     if (cpi->common.mb_no_coeff_skip)
       rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -4434,10 +4460,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
       mbmi->mode = B_PRED;
       mbmi->txfm_size = TX_4X4;
       dist = dist4x4 + (distuv >> 2);
-#if CONFIG_TX_SELECT
       memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
              sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
     } else {
       // FIXME(rbultje) support transform-size selection
       mbmi->mode = I8X8_PRED;
@@ -4445,10 +4469,8 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
       set_i8x8_block_modes(x, mode8x8);
       rate = rate8x8 + rateuv;
       dist = dist8x8 + (distuv >> 2);
-#if CONFIG_TX_SELECT
       memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0,
              sizeof(x->mb_context[xd->mb_index].txfm_rd_diff));
-#endif
     }
     if (cpi->common.mb_no_coeff_skip)
       rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -4805,8 +4827,8 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
             if (threshold < x->encode_breakout)
               threshold = x->encode_breakout;
 
-            var = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32)(*(b->base_src),
-              b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse);
+            var = vp8_variance32x32(*(b->base_src), b->src_stride,
+                                    xd->dst.y_buffer, xd->dst.y_stride, &sse);
 
             if (sse < threshold) {
               unsigned int q2dc = xd->block[24].dequant[0];
@@ -4816,11 +4838,9 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
                   (sse / 2 > var && sse - var < 64)) {
                 // Check u and v to make sure skip is ok
                 unsigned int sse2, sse3;
-                var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-                                  (x->src.u_buffer, x->src.uv_stride,
+                var += vp8_variance16x16(x->src.u_buffer, x->src.uv_stride,
                                    xd->dst.u_buffer, xd->dst.uv_stride, &sse2);
-                var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-                                  (x->src.v_buffer, x->src.uv_stride,
+                var += vp8_variance16x16(x->src.v_buffer, x->src.uv_stride,
                                    xd->dst.v_buffer, xd->dst.uv_stride, &sse3);
                 sse2 += sse3;
                 if (sse2 * 2 < threshold) {
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 2e86a16c0..f15e687c6 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -10,33 +10,10 @@
 
 
 #include <stdlib.h>
+#include "vp8/common/sadmxn.h"
 #include "vpx_ports/config.h"
 #include "vpx/vpx_integer.h"
 
-static __inline
-unsigned int sad_mx_n_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int m,
-  int n) {
-
-  int r, c;
-  unsigned int sad = 0;
-
-  for (r = 0; r < n; r++) {
-    for (c = 0; c < m; c++) {
-      sad += abs(src_ptr[c] - ref_ptr[c]);
-    }
-
-    src_ptr += src_stride;
-    ref_ptr += ref_stride;
-  }
-
-  return sad;
-}
-
 unsigned int vp8_sad32x32_c(const unsigned char *src_ptr,
                             int  src_stride,
                             const unsigned char *ref_ptr,
@@ -97,25 +74,6 @@ unsigned int vp8_sad4x4_c(
   return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4);
 }
 
-#if CONFIG_NEWBESTREFMV
-unsigned int vp8_sad3x16_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int max_sad){
-  return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16);
-}
-unsigned int vp8_sad16x3_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int max_sad){
-  return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
-}
-#endif
-
 void vp8_sad32x32x3_c(const unsigned char *src_ptr,
                       int  src_stride,
                       const unsigned char *ref_ptr,
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index d3d9711dc..865496ae2 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -11,18 +11,10 @@
 
 #include "onyx_int.h"
 
-void vp8_ssim_parms_16x16_c
-(
-  unsigned char *s,
-  int sp,
-  unsigned char *r,
-  int rp,
-  unsigned long *sum_s,
-  unsigned long *sum_r,
-  unsigned long *sum_sq_s,
-  unsigned long *sum_sq_r,
-  unsigned long *sum_sxr
-) {
+void vp8_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r,
+                            int rp, unsigned long *sum_s, unsigned long *sum_r,
+                            unsigned long *sum_sq_s, unsigned long *sum_sq_r,
+                            unsigned long *sum_sxr) {
   int i, j;
   for (i = 0; i < 16; i++, s += sp, r += rp) {
     for (j = 0; j < 16; j++) {
@@ -34,18 +26,10 @@ void vp8_ssim_parms_16x16_c
     }
   }
 }
-void vp8_ssim_parms_8x8_c
-(
-  unsigned char *s,
-  int sp,
-  unsigned char *r,
-  int rp,
-  unsigned long *sum_s,
-  unsigned long *sum_r,
-  unsigned long *sum_sq_s,
-  unsigned long *sum_sq_r,
-  unsigned long *sum_sxr
-) {
+void vp8_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp,
+                          unsigned long *sum_s, unsigned long *sum_r,
+                          unsigned long *sum_sq_s, unsigned long *sum_sq_r,
+                          unsigned long *sum_sxr) {
   int i, j;
   for (i = 0; i < 8; i++, s += sp, r += rp) {
     for (j = 0; j < 8; j++) {
@@ -61,15 +45,9 @@ void vp8_ssim_parms_8x8_c
 const static int64_t cc1 =  26634; // (64^2*(.01*255)^2
 const static int64_t cc2 = 239708; // (64^2*(.03*255)^2
 
-static double similarity
-(
-  unsigned long sum_s,
-  unsigned long sum_r,
-  unsigned long sum_sq_s,
-  unsigned long sum_sq_r,
-  unsigned long sum_sxr,
-  int count
-) {
+static double similarity(unsigned long sum_s, unsigned long sum_r,
+                         unsigned long sum_sq_s, unsigned long sum_sq_r,
+                         unsigned long sum_sxr, int count) {
   int64_t ssim_n, ssim_d;
   int64_t c1, c2;
 
@@ -87,23 +65,22 @@ static double similarity
   return ssim_n * 1.0 / ssim_d;
 }
 
-static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp,
-                         const vp8_variance_rtcd_vtable_t *rtcd) {
+static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) {
   unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
-  SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+  vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+                       &sum_sxr);
   return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
 }
-static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp,
-                       const vp8_variance_rtcd_vtable_t *rtcd) {
+static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) {
   unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
-  SSIMPF_INVOKE(rtcd, 8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+  vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+                     &sum_sxr);
   return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
 }
 
 // TODO: (jbb) tried to scale this function such that we may be able to use it
 // for distortion metric in mode selection code ( provided we do a reconstruction)
-long dssim(unsigned char *s, int sp, unsigned char *r, int rp,
-           const vp8_variance_rtcd_vtable_t *rtcd) {
+long dssim(unsigned char *s, int sp, unsigned char *r, int rp) {
   unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
   int64_t ssim3;
   int64_t ssim_n1, ssim_n2;
@@ -115,7 +92,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp,
   c1 = cc1 * 16;
   c2 = cc2 * 16;
 
-  SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+  vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+                       &sum_sxr);
   ssim_n1 = (2 * sum_s * sum_r + c1);
 
   ssim_n2 = ((int64_t) 2 * 256 * sum_sxr - (int64_t) 2 * sum_s * sum_r + c2);
@@ -137,16 +115,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp,
 // We are using a 8x8 moving window with starting location of each 8x8 window
 // on the 4x4 pixel grid. Such arrangement allows the windows to overlap
 // block boundaries to penalize blocking artifacts.
-double vp8_ssim2
-(
-  unsigned char *img1,
-  unsigned char *img2,
-  int stride_img1,
-  int stride_img2,
-  int width,
-  int height,
-  const vp8_variance_rtcd_vtable_t *rtcd
-) {
+double vp8_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1,
+                 int stride_img2, int width, int height) {
   int i, j;
   int samples = 0;
   double ssim_total = 0;
@@ -154,7 +124,7 @@ double vp8_ssim2
   // sample point start with each 4x4 location
   for (i = 0; i < height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
     for (j = 0; j < width - 8; j += 4) {
-      double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2, rtcd);
+      double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2);
       ssim_total += v;
       samples++;
     }
@@ -162,28 +132,22 @@ double vp8_ssim2
   ssim_total /= samples;
   return ssim_total;
 }
-double vp8_calc_ssim
-(
-  YV12_BUFFER_CONFIG *source,
-  YV12_BUFFER_CONFIG *dest,
-  int lumamask,
-  double *weight,
-  const vp8_variance_rtcd_vtable_t *rtcd
-) {
+double vp8_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+                     int lumamask, double *weight) {
   double a, b, c;
   double ssimv;
 
   a = vp8_ssim2(source->y_buffer, dest->y_buffer,
                 source->y_stride, dest->y_stride, source->y_width,
-                source->y_height, rtcd);
+                source->y_height);
 
   b = vp8_ssim2(source->u_buffer, dest->u_buffer,
                 source->uv_stride, dest->uv_stride, source->uv_width,
-                source->uv_height, rtcd);
+                source->uv_height);
 
   c = vp8_ssim2(source->v_buffer, dest->v_buffer,
                 source->uv_stride, dest->uv_stride, source->uv_width,
-                source->uv_height, rtcd);
+                source->uv_height);
 
   ssimv = a * .8 + .1 * (b + c);
 
@@ -192,29 +156,22 @@ double vp8_calc_ssim
   return ssimv;
 }
 
-double vp8_calc_ssimg
-(
-  YV12_BUFFER_CONFIG *source,
-  YV12_BUFFER_CONFIG *dest,
-  double *ssim_y,
-  double *ssim_u,
-  double *ssim_v,
-  const vp8_variance_rtcd_vtable_t *rtcd
-) {
+double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+                      double *ssim_y, double *ssim_u, double *ssim_v) {
   double ssim_all = 0;
   double a, b, c;
 
   a = vp8_ssim2(source->y_buffer, dest->y_buffer,
                 source->y_stride, dest->y_stride, source->y_width,
-                source->y_height, rtcd);
+                source->y_height);
 
   b = vp8_ssim2(source->u_buffer, dest->u_buffer,
                 source->uv_stride, dest->uv_stride, source->uv_width,
-                source->uv_height, rtcd);
+                source->uv_height);
 
   c = vp8_ssim2(source->v_buffer, dest->v_buffer,
                 source->uv_stride, dest->uv_stride, source->uv_width,
-                source->uv_height, rtcd);
+                source->uv_height);
   *ssim_y = a;
   *ssim_u = b;
   *ssim_v = c;
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index d46637a3e..2ddae1cbd 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -26,38 +26,26 @@
 
 #ifdef ENTROPY_STATS
 INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM
 INT64 hybrid_context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
 
 INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM8X8
 INT64 hybrid_context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
 
 INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_HYBRIDTRANSFORM16X16
 INT64 hybrid_context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#endif
 
 extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
-#if CONFIG_HYBRIDTRANSFORM
 extern unsigned int hybrid_tree_update_hist[BLOCK_TYPES][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
-#endif
 extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM8X8
 extern unsigned int hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#endif
 extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#if CONFIG_HYBRIDTRANSFORM16X16
 extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#endif
 #endif  /* ENTROPY_STATS */
 
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);
@@ -134,9 +122,7 @@ static void tokenize1st_order_b_16x16(MACROBLOCKD *xd,
   const int eob = b->eob;     /* one beyond last nonzero coeff */
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
   const short *qcoeff_ptr = b->qcoeff;
-#if CONFIG_HYBRIDTRANSFORM16X16
   TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
   int seg_eob = 256;
   int segment_id = xd->mode_info_context->mbmi.segment_id;
 
@@ -162,22 +148,18 @@ static void tokenize1st_order_b_16x16(MACROBLOCKD *xd,
     }
 
     t->Token = x;
-#if CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type != DCT_DCT)
       t->context_tree = cpi->common.fc.hybrid_coef_probs_16x16[type][band][pt];
     else
-#endif
       t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt];
 
     t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
                                    (band > 1 && type == PLANE_TYPE_Y_NO_DC));
     assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
     if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM16X16
       if (tx_type != DCT_DCT)
         ++cpi->hybrid_coef_counts_16x16[type][band][pt][x];
       else
-#endif
         ++cpi->coef_counts_16x16[type][band][pt][x];
     }
     pt = vp8_prev_token_class[x];
@@ -310,9 +292,7 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd,
   int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; /* start at DC unless type 0 */
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
   const short *qcoeff_ptr = b->qcoeff;
-#if CONFIG_HYBRIDTRANSFORM8X8
   TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
   const int eob = b->eob;
   int seg_eob = 64;
   int segment_id = xd->mode_info_context->mbmi.segment_id;
@@ -338,11 +318,9 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd,
       x = DCT_EOB_TOKEN;
 
     t->Token = x;
-#if CONFIG_HYBRIDTRANSFORM8X8
     if (tx_type != DCT_DCT)
       t->context_tree = cpi->common.fc.hybrid_coef_probs_8x8[type][band][pt];
     else
-#endif
       t->context_tree = cpi->common.fc.coef_probs_8x8[type][band][pt];
 
     t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
@@ -350,11 +328,9 @@ static void tokenize1st_order_b_8x8(MACROBLOCKD *xd,
     assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
 
     if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM8X8
       if (tx_type != DCT_DCT)
         ++cpi->hybrid_coef_counts_8x8[type][band][pt][x];
       else
-#endif
         ++cpi->coef_counts_8x8[type][band][pt][x];
     }
     pt = vp8_prev_token_class[x];
@@ -451,7 +427,6 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
     const int16_t *qcoeff_ptr = b->qcoeff;
     int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
 
-#if CONFIG_HYBRIDTRANSFORM
     TX_TYPE tx_type = get_tx_type(xd, &xd->block[block]);
     switch (tx_type) {
       case ADST_DCT:
@@ -464,7 +439,6 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
         pt_scan = vp8_default_zig_zag1d;
         break;
     }
-#endif
     a = (ENTROPY_CONTEXT *)xd->above_context + vp8_block2above[block];
     l = (ENTROPY_CONTEXT *)xd->left_context + vp8_block2left[block];
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
@@ -485,22 +459,18 @@ static void tokenize1st_order_b_4x4(MACROBLOCKD *xd,
         token = DCT_EOB_TOKEN;
 
       t->Token = token;
-#if CONFIG_HYBRIDTRANSFORM
       if (tx_type != DCT_DCT)
         t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt];
       else
-#endif
         t->context_tree = cpi->common.fc.coef_probs[type][band][pt];
 
       t->skip_eob_node = pt == 0 && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
                                      (band > 1 && type == PLANE_TYPE_Y_NO_DC));
       assert(vp8_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
       if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM
         if (tx_type != DCT_DCT)
           ++cpi->hybrid_coef_counts[type][band][pt][token];
         else
-#endif
           ++cpi->coef_counts[type][band][pt][token];
       }
       pt = vp8_prev_token_class[token];
@@ -619,7 +589,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
       xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd);
       break;
     case TX_8X8:
-      if (xd->mode_info_context->mbmi.mode == I8X8_PRED)
+      if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+          xd->mode_info_context->mbmi.mode == SPLITMV)
         xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8_4x4uv(xd, 0);
       else
         xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8(xd, has_y2_block);
@@ -668,17 +639,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
 
     tokenize1st_order_b_16x16(xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
                               A, L, cpi, dry_run);
+    A[1] = A[2] = A[3] = A[0];
+    L[1] = L[2] = L[3] = L[0];
 
-    for (b = 1; b < 16; b++) {
-      *(A + vp8_block2above[b]) = *(A);
-      *(L + vp8_block2left[b] ) = *(L);
-    }
     for (b = 16; b < 24; b += 4) {
       tokenize1st_order_b_8x8(xd, xd->block + b, t, PLANE_TYPE_UV,
                               A + vp8_block2above_8x8[b],
                               L + vp8_block2left_8x8[b], cpi, dry_run);
-      *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]);
-      *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]);
+      A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+      L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
     }
     vpx_memset(&A[8], 0, sizeof(A[8]));
     vpx_memset(&L[8], 0, sizeof(L[8]));
@@ -692,18 +661,19 @@ void vp8_tokenize_mb(VP8_COMP *cpi,
                               A + vp8_block2above_8x8[b],
                               L + vp8_block2left_8x8[b],
                               cpi, dry_run);
-      *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
-      *(L + vp8_block2left_8x8[b] + 1)  = *(L + vp8_block2left_8x8[b]);
+      A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+      L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
     }
-    if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+    if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+        xd->mode_info_context->mbmi.mode == SPLITMV) {
       tokenize1st_order_chroma_4x4(xd, t, cpi, dry_run);
     } else {
       for (b = 16; b < 24; b += 4) {
         tokenize1st_order_b_8x8(xd, xd->block + b, t, PLANE_TYPE_UV,
                                 A + vp8_block2above_8x8[b],
                                 L + vp8_block2left_8x8[b], cpi, dry_run);
-        *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
-        *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+        A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+        L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
       }
     }
   } else {
@@ -995,30 +965,24 @@ static __inline void stuff1st_order_b_8x8(MACROBLOCKD *xd,
                                           int dry_run) {
   int pt; /* near block/prev token context index */
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
-#if CONFIG_HYBRIDTRANSFORM8X8
   TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
   const int band = vp8_coef_bands_8x8[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
   (void) b;
 
   t->Token = DCT_EOB_TOKEN;
-#if CONFIG_HYBRIDTRANSFORM8X8
   if (tx_type != DCT_DCT)
     t->context_tree = cpi->common.fc.hybrid_coef_probs_8x8[type][band][pt];
   else
-#endif
     t->context_tree = cpi->common.fc.coef_probs_8x8[type][band][pt];
   // t->section = 8;
   t->skip_eob_node = 0;
   ++t;
   *tp = t;
   if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM8X8
     if (tx_type == DCT_DCT)
       ++cpi->hybrid_coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN];
     else
-#endif
       ++cpi->coef_counts_8x8[type][band][pt][DCT_EOB_TOKEN];
   }
   pt = 0; /* 0 <-> all coeff data is zero */
@@ -1074,8 +1038,8 @@ static void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *xd,
                          A + vp8_block2above_8x8[b],
                          L + vp8_block2left_8x8[b],
                          cpi, dry_run);
-    *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
-    *(L + vp8_block2left_8x8[b] + 1)  = *(L + vp8_block2left_8x8[b]);
+    A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+    L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
   }
 
   for (b = 16; b < 24; b += 4) {
@@ -1083,8 +1047,8 @@ static void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *xd,
                            A + vp8_block2above[b],
                            L + vp8_block2left[b],
                            cpi, dry_run);
-    *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
-    *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
+    A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+    L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
   }
   if (dry_run)
     *t = t_backup;
@@ -1100,29 +1064,23 @@ static __inline void stuff1st_order_b_16x16(MACROBLOCKD *xd,
                                             int dry_run) {
   int pt; /* near block/prev token context index */
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
-#if CONFIG_HYBRIDTRANSFORM16X16
   TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
   const int band = vp8_coef_bands_16x16[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
   (void) b;
 
   t->Token = DCT_EOB_TOKEN;
-#if CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type != DCT_DCT)
     t->context_tree = cpi->common.fc.hybrid_coef_probs_16x16[type][band][pt];
   else
-#endif
     t->context_tree = cpi->common.fc.coef_probs_16x16[type][band][pt];
   t->skip_eob_node = 0;
   ++t;
   *tp = t;
   if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type != DCT_DCT)
       ++cpi->hybrid_coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN];
     else
-#endif
       ++cpi->coef_counts_16x16[type][band][pt][DCT_EOB_TOKEN];
   }
   pt = 0; /* 0 <-> all coeff data is zero */
@@ -1138,17 +1096,15 @@ static void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *xd,
 
   stuff1st_order_b_16x16(xd, xd->block, t, PLANE_TYPE_Y_WITH_DC,
                          A, L, cpi, dry_run);
-  for (i = 1; i < 16; i++) {
-    *(A + vp8_block2above[i]) = *(A);
-    *(L +  vp8_block2left[i]) = *(L);
-  }
+  A[1] = A[2] = A[3] = A[0];
+  L[1] = L[2] = L[3] = L[0];
   for (b = 16; b < 24; b += 4) {
     stuff1st_order_buv_8x8(xd, xd->block + b, t,
         A + vp8_block2above[b],
         L + vp8_block2left[b],
         cpi, dry_run);
-    *(A + vp8_block2above_8x8[b]+1) = *(A + vp8_block2above_8x8[b]);
-    *(L + vp8_block2left_8x8[b]+1 ) = *(L + vp8_block2left_8x8[b]);
+    A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+    L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
   }
   vpx_memset(&A[8], 0, sizeof(A[8]));
   vpx_memset(&L[8], 0, sizeof(L[8]));
@@ -1189,28 +1145,22 @@ static __inline void stuff1st_order_b_4x4(MACROBLOCKD *xd,
                                           int dry_run) {
   int pt; /* near block/prev token context index */
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
-#if CONFIG_HYBRIDTRANSFORM
   TX_TYPE tx_type = get_tx_type(xd, b);
-#endif
   const int band = vp8_coef_bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
   t->Token = DCT_EOB_TOKEN;
-#if CONFIG_HYBRIDTRANSFORM
   if (tx_type != DCT_DCT)
     t->context_tree = cpi->common.fc.hybrid_coef_probs[type][band][pt];
   else
-#endif
     t->context_tree = cpi->common.fc.coef_probs[type][band][pt];
   t->skip_eob_node = 0;
   ++t;
   *tp = t;
   if (!dry_run) {
-#if CONFIG_HYBRIDTRANSFORM
     if (tx_type != DCT_DCT)
       ++cpi->hybrid_coef_counts[type][band][pt][DCT_EOB_TOKEN];
     else
-#endif
       ++cpi->coef_counts[type][band][pt][DCT_EOB_TOKEN];
   }
   pt = 0; /* 0 <-> all coeff data is zero */
@@ -1288,8 +1238,8 @@ static void vp8_stuff_mb_8x8_4x4uv(VP8_COMP *cpi, MACROBLOCKD *xd,
                          A + vp8_block2above_8x8[b],
                          L + vp8_block2left_8x8[b],
                          cpi, dry_run);
-    *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
-    *(L + vp8_block2left_8x8[b] + 1)  = *(L + vp8_block2left_8x8[b]);
+    A[vp8_block2above_8x8[b] + 1] = A[vp8_block2above_8x8[b]];
+    L[vp8_block2left_8x8[b] + 1]  = L[vp8_block2left_8x8[b]];
   }
 
   for (b = 16; b < 24; b++)
@@ -1308,7 +1258,8 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
   if (tx_size == TX_16X16) {
     vp8_stuff_mb_16x16(cpi, xd, t, dry_run);
   } else if (tx_size == TX_8X8) {
-    if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+    if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+        xd->mode_info_context->mbmi.mode == SPLITMV) {
       vp8_stuff_mb_8x8_4x4uv(cpi, xd, t, dry_run);
     } else {
       vp8_stuff_mb_8x8(cpi, xd, t, dry_run);
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index a2fadfc4c..cdeb390c3 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -12,507 +12,73 @@
 #ifndef VARIANCE_H
 #define VARIANCE_H
 
-#include "vpx_config.h"
-
-#define prototype_sad(sym)\
-  unsigned int (sym)\
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   const unsigned char *ref_ptr, \
-   int  ref_stride, \
-   int max_sad\
-  )
-
-#define prototype_sad_multi_same_address(sym)\
-  void (sym)\
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   const unsigned char *ref_ptr, \
-   int  ref_stride, \
-   unsigned int *sad_array\
-  )
-
-#define prototype_sad_multi_same_address_1(sym)\
-  void (sym)\
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   const unsigned char *ref_ptr, \
-   int  ref_stride, \
-   unsigned short *sad_array\
-  )
-
-#define prototype_sad_multi_dif_address(sym)\
-  void (sym)\
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   unsigned char *ref_ptr[4], \
-   int  ref_stride, \
-   unsigned int *sad_array\
-  )
-
-#define prototype_variance(sym) \
-  unsigned int (sym) \
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   const unsigned char *ref_ptr, \
-   int  ref_stride, \
-   unsigned int *sse\
-  )
-
-#define prototype_variance2(sym) \
-  unsigned int (sym) \
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   const unsigned char *ref_ptr, \
-   int  ref_stride, \
-   unsigned int *sse,\
-   int *sum\
-  )
-
-#define prototype_subpixvariance(sym) \
-  unsigned int (sym) \
-  ( \
-    const unsigned char  *src_ptr, \
-    int  source_stride, \
-    int  xoffset, \
-    int  yoffset, \
-    const unsigned char *ref_ptr, \
-    int Refstride, \
-    unsigned int *sse \
-  );
-
-#define prototype_ssimpf(sym) \
-  void (sym) \
-  ( \
-    unsigned char *s, \
-    int sp, \
-    unsigned char *r, \
-    int rp, \
-    unsigned long *sum_s, \
-    unsigned long *sum_r, \
-    unsigned long *sum_sq_s, \
-    unsigned long *sum_sq_r, \
-    unsigned long *sum_sxr \
-  );
-
-#define prototype_getmbss(sym) unsigned int (sym)(const short *)
-
-#define prototype_get16x16prederror(sym)\
-  unsigned int (sym)\
-  (\
-   const unsigned char *src_ptr, \
-   int source_stride, \
-   const unsigned char *ref_ptr, \
-   int  ref_stride \
-  )
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/variance_x86.h"
-#endif
-
-#if ARCH_ARM
-#include "arm/variance_arm.h"
-#endif
-
-#ifndef vp8_variance_sad4x4
-#define vp8_variance_sad4x4 vp8_sad4x4_c
-#endif
-extern prototype_sad(vp8_variance_sad4x4);
-
-#ifndef vp8_variance_sad8x8
-#define vp8_variance_sad8x8 vp8_sad8x8_c
-#endif
-extern prototype_sad(vp8_variance_sad8x8);
-
-#ifndef vp8_variance_sad8x16
-#define vp8_variance_sad8x16 vp8_sad8x16_c
-#endif
-extern prototype_sad(vp8_variance_sad8x16);
-
-#ifndef vp8_variance_sad16x8
-#define vp8_variance_sad16x8 vp8_sad16x8_c
-#endif
-extern prototype_sad(vp8_variance_sad16x8);
-
-#ifndef vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_c
-#endif
-extern prototype_sad(vp8_variance_sad16x16);
-
-#ifndef vp8_variance_sad32x32
-#define vp8_variance_sad32x32 vp8_sad32x32_c
-#endif
-extern prototype_sad(vp8_variance_sad32x32);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_sad32x32x3
-#define vp8_variance_sad32x32x3 vp8_sad32x32x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad32x32x3);
-
-#ifndef vp8_variance_sad16x16x3
-#define vp8_variance_sad16x16x3 vp8_sad16x16x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad16x16x3);
-
-#ifndef vp8_variance_sad16x8x3
-#define vp8_variance_sad16x8x3 vp8_sad16x8x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad16x8x3);
-
-#ifndef vp8_variance_sad8x8x3
-#define vp8_variance_sad8x8x3 vp8_sad8x8x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad8x8x3);
-
-#ifndef vp8_variance_sad8x16x3
-#define vp8_variance_sad8x16x3 vp8_sad8x16x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
-
-#ifndef vp8_variance_sad4x4x3
-#define vp8_variance_sad4x4x3 vp8_sad4x4x3_c
-#endif
-extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
-
-#ifndef vp8_variance_sad32x32x8
-#define vp8_variance_sad32x32x8 vp8_sad32x32x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad32x32x8);
-
-#ifndef vp8_variance_sad16x16x8
-#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
-
-#ifndef vp8_variance_sad16x8x8
-#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
-
-#ifndef vp8_variance_sad8x8x8
-#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
-
-#ifndef vp8_variance_sad8x16x8
-#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
-
-#ifndef vp8_variance_sad4x4x8
-#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
-#endif
-extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_sad32x32x4d
-#define vp8_variance_sad32x32x4d vp8_sad32x32x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad32x32x4d);
-
-#ifndef vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad16x16x4d);
-
-#ifndef vp8_variance_sad16x8x4d
-#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad16x8x4d);
-
-#ifndef vp8_variance_sad8x8x4d
-#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad8x8x4d);
-
-#ifndef vp8_variance_sad8x16x4d
-#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad8x16x4d);
-
-#ifndef vp8_variance_sad4x4x4d
-#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_c
-#endif
-extern prototype_sad_multi_dif_address(vp8_variance_sad4x4x4d);
-
-#if ARCH_X86 || ARCH_X86_64
-#ifndef vp8_variance_copy32xn
-#define vp8_variance_copy32xn vp8_copy32xn_c
-#endif
-extern prototype_sad(vp8_variance_copy32xn);
-#endif
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_var4x4
-#define vp8_variance_var4x4 vp8_variance4x4_c
-#endif
-extern prototype_variance(vp8_variance_var4x4);
-
-#ifndef vp8_variance_var8x8
-#define vp8_variance_var8x8 vp8_variance8x8_c
-#endif
-extern prototype_variance(vp8_variance_var8x8);
-
-#ifndef vp8_variance_var8x16
-#define vp8_variance_var8x16 vp8_variance8x16_c
-#endif
-extern prototype_variance(vp8_variance_var8x16);
-
-#ifndef vp8_variance_var16x8
-#define vp8_variance_var16x8 vp8_variance16x8_c
-#endif
-extern prototype_variance(vp8_variance_var16x8);
-
-#ifndef vp8_variance_var16x16
-#define vp8_variance_var16x16 vp8_variance16x16_c
-#endif
-extern prototype_variance(vp8_variance_var16x16);
-
-#ifndef vp8_variance_var32x32
-#define vp8_variance_var32x32 vp8_variance32x32_c
-#endif
-extern prototype_variance(vp8_variance_var32x32);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_subpixvar4x4
-#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar4x4);
-
-#ifndef vp8_variance_subpixvar8x8
-#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar8x8);
-
-#ifndef vp8_variance_subpixvar8x16
-#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar8x16);
-
-#ifndef vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
-
-#ifndef vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
-
-#ifndef vp8_variance_subpixvar32x32
-#define vp8_variance_subpixvar32x32 vp8_sub_pixel_variance32x32_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixvar32x32);
-
-#ifndef vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar16x16_h);
-
-#ifndef vp8_variance_halfpixvar32x32_h
-#define vp8_variance_halfpixvar32x32_h vp8_variance_halfpixvar32x32_h_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar32x32_h);
-
-#ifndef vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar16x16_v);
-
-#ifndef vp8_variance_halfpixvar32x32_v
-#define vp8_variance_halfpixvar32x32_v vp8_variance_halfpixvar32x32_v_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar32x32_v);
-
-#ifndef vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
-
-#ifndef vp8_variance_halfpixvar32x32_hv
-#define vp8_variance_halfpixvar32x32_hv vp8_variance_halfpixvar32x32_hv_c
-#endif
-extern prototype_variance(vp8_variance_halfpixvar32x32_hv);
-
-#ifndef vp8_variance_subpixmse16x16
-#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixmse16x16);
-
-#ifndef vp8_variance_subpixmse32x32
-#define vp8_variance_subpixmse32x32 vp8_sub_pixel_mse32x32_c
-#endif
-extern prototype_subpixvariance(vp8_variance_subpixmse32x32);
-
-// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
-
-#ifndef vp8_variance_getmbss
-#define vp8_variance_getmbss vp8_get_mb_ss_c
-#endif
-extern prototype_getmbss(vp8_variance_getmbss);
-
-#ifndef vp8_variance_mse16x16
-#define vp8_variance_mse16x16 vp8_mse16x16_c
-#endif
-extern prototype_variance(vp8_variance_mse16x16);
-
-#ifndef vp8_ssimpf_8x8
-#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
-#endif
-extern prototype_ssimpf(vp8_ssimpf_8x8)
-
-#ifndef vp8_ssimpf_16x16
-#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
-#endif
-extern prototype_ssimpf(vp8_ssimpf_16x16)
-
-#ifndef vp8_variance_satd16x16
-#define vp8_variance_satd16x16 vp8_satd16x16_c
-#endif
-extern prototype_variance(vp8_variance_satd16x16);
-
-typedef prototype_sad(*vp8_sad_fn_t);
-typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
-typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
-typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
-typedef prototype_variance(*vp8_variance_fn_t);
-typedef prototype_variance2(*vp8_variance2_fn_t);
-typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t);
-typedef prototype_getmbss(*vp8_getmbss_fn_t);
-typedef prototype_ssimpf(*vp8_ssimpf_fn_t);
-typedef prototype_get16x16prederror(*vp8_get16x16prederror_fn_t);
-
-typedef struct {
-  vp8_sad_fn_t             sad4x4;
-  vp8_sad_fn_t             sad8x8;
-  vp8_sad_fn_t             sad8x16;
-  vp8_sad_fn_t             sad16x8;
-  vp8_sad_fn_t             sad16x16;
-#if CONFIG_SUPERBLOCKS
-  vp8_sad_fn_t             sad32x32;
-#endif
-
-  vp8_variance_fn_t        var4x4;
-  vp8_variance_fn_t        var8x8;
-  vp8_variance_fn_t        var8x16;
-  vp8_variance_fn_t        var16x8;
-  vp8_variance_fn_t        var16x16;
-#if CONFIG_SUPERBLOCKS
-  vp8_variance_fn_t        var32x32;
-#endif
-
-  vp8_subpixvariance_fn_t  subpixvar4x4;
-  vp8_subpixvariance_fn_t  subpixvar8x8;
-  vp8_subpixvariance_fn_t  subpixvar8x16;
-  vp8_subpixvariance_fn_t  subpixvar16x8;
-  vp8_subpixvariance_fn_t  subpixvar16x16;
-#if CONFIG_SUPERBLOCKS
-  vp8_subpixvariance_fn_t  subpixvar32x32;
-#endif
-  vp8_variance_fn_t        halfpixvar16x16_h;
-  vp8_variance_fn_t        halfpixvar32x32_h;
-  vp8_variance_fn_t        halfpixvar16x16_v;
-#if CONFIG_SUPERBLOCKS
-  vp8_variance_fn_t        halfpixvar32x32_v;
-#endif
-  vp8_variance_fn_t        halfpixvar16x16_hv;
-#if CONFIG_SUPERBLOCKS
-  vp8_variance_fn_t        halfpixvar32x32_hv;
-#endif
-  vp8_subpixvariance_fn_t  subpixmse16x16;
-#if CONFIG_SUPERBLOCKS
-  vp8_subpixvariance_fn_t  subpixmse32x32;
-#endif
-
-  vp8_getmbss_fn_t         getmbss;
-  vp8_variance_fn_t        mse16x16;
-
-#if CONFIG_SUPERBLOCKS
-  vp8_sad_multi_fn_t       sad32x32x3;
-#endif
-  vp8_sad_multi_fn_t       sad16x16x3;
-  vp8_sad_multi_fn_t       sad16x8x3;
-  vp8_sad_multi_fn_t       sad8x16x3;
-  vp8_sad_multi_fn_t       sad8x8x3;
-  vp8_sad_multi_fn_t       sad4x4x3;
-
-#if CONFIG_SUPERBLOCKS
-  vp8_sad_multi1_fn_t      sad32x32x8;
-#endif
-  vp8_sad_multi1_fn_t      sad16x16x8;
-  vp8_sad_multi1_fn_t      sad16x8x8;
-  vp8_sad_multi1_fn_t      sad8x16x8;
-  vp8_sad_multi1_fn_t      sad8x8x8;
-  vp8_sad_multi1_fn_t      sad4x4x8;
-
-#if CONFIG_SUPERBLOCKS
-  vp8_sad_multi_d_fn_t     sad32x32x4d;
-#endif
-  vp8_sad_multi_d_fn_t     sad16x16x4d;
-  vp8_sad_multi_d_fn_t     sad16x8x4d;
-  vp8_sad_multi_d_fn_t     sad8x16x4d;
-  vp8_sad_multi_d_fn_t     sad8x8x4d;
-  vp8_sad_multi_d_fn_t     sad4x4x4d;
-
-#if ARCH_X86 || ARCH_X86_64
-  vp8_sad_fn_t             copy32xn;
-#endif
-
-#if CONFIG_INTERNAL_STATS
-  vp8_ssimpf_fn_t          ssimpf_8x8;
-  vp8_ssimpf_fn_t          ssimpf_16x16;
-#endif
-
-  vp8_variance_fn_t        satd16x16;
-} vp8_variance_rtcd_vtable_t;
-
-typedef struct {
-  vp8_sad_fn_t            sdf;
-  vp8_variance_fn_t       vf;
-  vp8_subpixvariance_fn_t svf;
-  vp8_variance_fn_t       svf_halfpix_h;
-  vp8_variance_fn_t       svf_halfpix_v;
-  vp8_variance_fn_t       svf_halfpix_hv;
-  vp8_sad_multi_fn_t      sdx3f;
-  vp8_sad_multi1_fn_t     sdx8f;
-  vp8_sad_multi_d_fn_t    sdx4df;
-#if ARCH_X86 || ARCH_X86_64
-  vp8_sad_fn_t            copymem;
-#endif
+typedef unsigned int(*vp8_sad_fn_t)(const unsigned char *src_ptr,
+                                    int source_stride,
+                                    const unsigned char *ref_ptr,
+                                    int ref_stride,
+                                    unsigned int max_sad);
+
+typedef void (*vp8_copy32xn_fn_t)(const unsigned char *src_ptr,
+                                  int source_stride,
+                                  const unsigned char *ref_ptr,
+                                  int ref_stride,
+                                  int n);
+
+typedef void (*vp8_sad_multi_fn_t)(const unsigned char *src_ptr,
+                                   int source_stride,
+                                   const unsigned char *ref_ptr,
+                                   int  ref_stride,
+                                   unsigned int *sad_array);
+
+typedef void (*vp8_sad_multi1_fn_t)(const unsigned char *src_ptr,
+                                    int source_stride,
+                                    const unsigned char *ref_ptr,
+                                    int  ref_stride,
+                                    unsigned short *sad_array);
+
+typedef void (*vp8_sad_multi_d_fn_t)(const unsigned char *src_ptr,
+                                     int source_stride,
+                                     const unsigned char * const ref_ptr[],
+                                     int  ref_stride, unsigned int *sad_array);
+
+typedef unsigned int (*vp8_variance_fn_t)(const unsigned char *src_ptr,
+                                          int source_stride,
+                                          const unsigned char *ref_ptr,
+                                          int ref_stride,
+                                          unsigned int *sse);
+
+typedef unsigned int (*vp8_subpixvariance_fn_t)(const unsigned char  *src_ptr,
+                                                int source_stride,
+                                                int xoffset,
+                                                int yoffset,
+                                                const unsigned char *ref_ptr,
+                                                int Refstride,
+                                                unsigned int *sse);
+
+typedef void (*vp8_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r,
+                                int rp, unsigned long *sum_s,
+                                unsigned long *sum_r, unsigned long *sum_sq_s,
+                                unsigned long *sum_sq_r,
+                                unsigned long *sum_sxr);
+
+typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
+
+typedef unsigned int (*vp8_get16x16prederror_fn_t)(const unsigned char *src_ptr,
+                                                   int source_stride,
+                                                   const unsigned char *ref_ptr,
+                                                   int  ref_stride);
+
+typedef struct variance_vtable {
+    vp8_sad_fn_t            sdf;
+    vp8_variance_fn_t       vf;
+    vp8_subpixvariance_fn_t svf;
+    vp8_variance_fn_t       svf_halfpix_h;
+    vp8_variance_fn_t       svf_halfpix_v;
+    vp8_variance_fn_t       svf_halfpix_hv;
+    vp8_sad_multi_fn_t      sdx3f;
+    vp8_sad_multi1_fn_t     sdx8f;
+    vp8_sad_multi_d_fn_t    sdx4df;
+    vp8_copy32xn_fn_t       copymem;
 } vp8_variance_fn_ptr_t;
 
-#if CONFIG_RUNTIME_CPU_DETECT
-#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
-#define SSIMPF_INVOKE(ctx,fn) (ctx)->ssimpf_##fn
-#else
-#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
-#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn
-#endif
-
-#if CONFIG_NEWBESTREFMV
-unsigned int vp8_sad2x16_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int max_sad);
-unsigned int vp8_sad16x2_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int max_sad);
-#endif
-
 #endif
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
deleted file mode 100644
index 0971f11b0..000000000
--- a/vp8/encoder/x86/variance_x86.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VARIANCE_X86_H
-#define VARIANCE_X86_H
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-extern prototype_sad(vp8_sad4x4_mmx);
-extern prototype_sad(vp8_sad8x8_mmx);
-extern prototype_sad(vp8_sad8x16_mmx);
-extern prototype_sad(vp8_sad16x8_mmx);
-extern prototype_sad(vp8_sad16x16_mmx);
-extern prototype_variance(vp8_variance4x4_mmx);
-extern prototype_variance(vp8_variance8x8_mmx);
-extern prototype_variance(vp8_variance8x16_mmx);
-extern prototype_variance(vp8_variance16x8_mmx);
-extern prototype_variance(vp8_variance16x16_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx);
-extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx);
-extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx);
-extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
-extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
-extern prototype_getmbss(vp8_get_mb_ss_mmx);
-extern prototype_variance(vp8_mse16x16_mmx);
-extern prototype_variance2(vp8_get8x8var_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_variance_sad4x4
-#define vp8_variance_sad4x4 vp8_sad4x4_mmx
-
-#undef  vp8_variance_sad8x8
-#define vp8_variance_sad8x8 vp8_sad8x8_mmx
-
-#undef  vp8_variance_sad8x16
-#define vp8_variance_sad8x16 vp8_sad8x16_mmx
-
-#undef  vp8_variance_sad16x8
-#define vp8_variance_sad16x8 vp8_sad16x8_mmx
-
-#undef  vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_mmx
-
-#undef  vp8_variance_var4x4
-#define vp8_variance_var4x4 vp8_variance4x4_mmx
-
-#undef  vp8_variance_var8x8
-#define vp8_variance_var8x8 vp8_variance8x8_mmx
-
-#undef  vp8_variance_var8x16
-#define vp8_variance_var8x16 vp8_variance8x16_mmx
-
-#undef  vp8_variance_var16x8
-#define vp8_variance_var16x8 vp8_variance16x8_mmx
-
-#undef  vp8_variance_var16x16
-#define vp8_variance_var16x16 vp8_variance16x16_mmx
-
-#undef  vp8_variance_subpixvar4x4
-#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_mmx
-
-#undef  vp8_variance_subpixvar8x8
-#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_mmx
-
-#undef  vp8_variance_subpixvar8x16
-#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_mmx
-
-#undef  vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_mmx
-
-#undef  vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx
-
-#undef  vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx
-
-#undef  vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx
-
-#undef  vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx
-
-#undef  vp8_variance_subpixmse16x16
-#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx
-
-#undef  vp8_variance_getmbss
-#define vp8_variance_getmbss vp8_get_mb_ss_mmx
-
-#undef  vp8_variance_mse16x16
-#define vp8_variance_mse16x16 vp8_mse16x16_mmx
-
-#endif
-#endif
-
-
-#if HAVE_SSE2
-extern prototype_sad(vp8_sad4x4_wmt);
-extern prototype_sad(vp8_sad8x8_wmt);
-extern prototype_sad(vp8_sad8x16_wmt);
-extern prototype_sad(vp8_sad16x8_wmt);
-extern prototype_sad(vp8_sad16x16_wmt);
-extern prototype_sad(vp8_copy32xn_sse2);
-extern prototype_variance(vp8_variance4x4_wmt);
-extern prototype_variance(vp8_variance8x8_wmt);
-extern prototype_variance(vp8_variance8x16_wmt);
-extern prototype_variance(vp8_variance16x8_wmt);
-extern prototype_variance(vp8_variance16x16_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt);
-extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
-extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
-extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
-extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
-extern prototype_getmbss(vp8_get_mb_ss_sse2);
-extern prototype_variance(vp8_mse16x16_wmt);
-extern prototype_variance2(vp8_get8x8var_sse2);
-extern prototype_variance2(vp8_get16x16var_sse2);
-extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
-extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_variance_sad4x4
-#define vp8_variance_sad4x4 vp8_sad4x4_wmt
-
-#undef  vp8_variance_sad8x8
-#define vp8_variance_sad8x8 vp8_sad8x8_wmt
-
-#undef  vp8_variance_sad8x16
-#define vp8_variance_sad8x16 vp8_sad8x16_wmt
-
-#undef  vp8_variance_sad16x8
-#define vp8_variance_sad16x8 vp8_sad16x8_wmt
-
-#undef  vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_wmt
-
-#undef  vp8_variance_copy32xn
-#define vp8_variance_copy32xn vp8_copy32xn_sse2
-
-#undef  vp8_variance_var4x4
-#define vp8_variance_var4x4 vp8_variance4x4_wmt
-
-#undef  vp8_variance_var8x8
-#define vp8_variance_var8x8 vp8_variance8x8_wmt
-
-#undef  vp8_variance_var8x16
-#define vp8_variance_var8x16 vp8_variance8x16_wmt
-
-#undef  vp8_variance_var16x8
-#define vp8_variance_var16x8 vp8_variance16x8_wmt
-
-#undef  vp8_variance_var16x16
-#define vp8_variance_var16x16 vp8_variance16x16_wmt
-
-#undef  vp8_variance_subpixvar4x4
-#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_wmt
-
-#undef  vp8_variance_subpixvar8x8
-#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_wmt
-
-#undef  vp8_variance_subpixvar8x16
-#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_wmt
-
-#undef  vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_wmt
-
-#undef  vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt
-
-#undef  vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt
-
-#undef  vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt
-
-#undef  vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt
-
-#undef  vp8_variance_subpixmse16x16
-#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt
-
-#undef  vp8_variance_getmbss
-#define vp8_variance_getmbss vp8_get_mb_ss_sse2
-
-#undef  vp8_variance_mse16x16
-#define vp8_variance_mse16x16 vp8_mse16x16_wmt
-
-#if ARCH_X86_64
-#undef  vp8_ssimpf_8x8
-#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2
-
-#undef  vp8_ssimpf_16x16
-#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2
-#endif
-
-#endif
-#endif
-
-
-#if HAVE_SSE3
-extern prototype_sad(vp8_sad16x16_sse3);
-extern prototype_sad(vp8_sad16x8_sse3);
-extern prototype_sad_multi_same_address(vp8_sad16x16x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad16x8x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad8x16x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad8x8x3_sse3);
-extern prototype_sad_multi_same_address(vp8_sad4x4x3_sse3);
-
-extern prototype_sad_multi_dif_address(vp8_sad16x16x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad16x8x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad8x16x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad8x8x4d_sse3);
-extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
-extern prototype_sad(vp8_copy32xn_sse3);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp8_variance_sad16x16
-#define vp8_variance_sad16x16 vp8_sad16x16_sse3
-
-#undef  vp8_variance_sad16x16x3
-#define vp8_variance_sad16x16x3 vp8_sad16x16x3_sse3
-
-#undef  vp8_variance_sad16x8x3
-#define vp8_variance_sad16x8x3 vp8_sad16x8x3_sse3
-
-#undef  vp8_variance_sad8x16x3
-#define vp8_variance_sad8x16x3 vp8_sad8x16x3_sse3
-
-#undef  vp8_variance_sad8x8x3
-#define vp8_variance_sad8x8x3 vp8_sad8x8x3_sse3
-
-#undef  vp8_variance_sad4x4x3
-#define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3
-
-#undef  vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3
-
-#undef  vp8_variance_sad16x8x4d
-#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3
-
-#undef  vp8_variance_sad8x16x4d
-#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_sse3
-
-#undef  vp8_variance_sad8x8x4d
-#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_sse3
-
-#undef  vp8_variance_sad4x4x4d
-#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_sse3
-
-#undef  vp8_variance_copy32xn
-#define vp8_variance_copy32xn vp8_copy32xn_sse3
-
-#endif
-#endif
-
-
-#if HAVE_SSSE3
-extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
-extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3);
-extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_variance_sad16x16x3
-#define vp8_variance_sad16x16x3 vp8_sad16x16x3_ssse3
-
-#undef  vp8_variance_sad16x8x3
-#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
-
-#undef  vp8_variance_subpixvar16x8
-#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3
-
-#undef  vp8_variance_subpixvar16x16
-#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
-
-#endif
-#endif
-
-
-#if HAVE_SSE4_1
-extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
-extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_variance_sad16x16x8
-#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
-
-#undef  vp8_variance_sad16x8x8
-#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
-
-#undef  vp8_variance_sad8x16x8
-#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
-
-#undef  vp8_variance_sad8x8x8
-#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
-
-#undef  vp8_variance_sad4x4x8
-#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
-
-#endif
-#endif
-
-#endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 71c51c14f..a169b493e 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -90,31 +90,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) {
   /* Override default functions with fastest ones for this CPU. */
 #if HAVE_MMX
   if (flags & HAS_MMX) {
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_mmx;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_mmx;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_mmx;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_mmx;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_mmx;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_mmx;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_mmx;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_mmx;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_mmx;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_mmx;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_mmx;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mmx;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mmx;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_mmx;
-    cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_mmx;
-    cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_mmx;
-    cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_mmx;
-    cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx;
-
     cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
     cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_mmx;
     cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_mmx;
@@ -126,32 +101,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) {
 
 #if HAVE_SSE2
   if (flags & HAS_SSE2) {
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_wmt;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_wmt;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_wmt;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_wmt;
-    cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse2;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_wmt;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_wmt;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_wmt;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_wmt;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_wmt;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_wmt;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_wmt;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wmt;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wmt;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_wmt;
-    cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_wmt;
-    cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_wmt;
-    cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_wmt;
-    cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2;
-
     cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
     cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
     cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
@@ -160,54 +109,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) {
     cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2;
     cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2;
 
-#if CONFIG_INTERNAL_STATS
-#if ARCH_X86_64
-    cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse2;
-    cpi->rtcd.variance.ssimpf_16x16          = vp8_ssim_parms_16x16_sse2;
-#endif
-#endif
   }
 #endif
 
 #if HAVE_SSE3
   if (flags & HAS_SSE3) {
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3;
-    cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_sse3;
-    cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_sse3;
-    cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3;
-    cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3;
-    cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3;
     cpi->rtcd.search.full_search             = vp8_full_search_sadx3;
-    cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3;
-    cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3;
-    cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3;
-    cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3;
-    cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3;
-    cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse3;
     cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4;
     cpi->rtcd.search.refining_search         = vp8_refining_search_sadx4;
   }
 #endif
 
-#if HAVE_SSSE3
-  if (flags & HAS_SSSE3) {
-    cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
-    cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
-
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;
-  }
-#endif
-
-
 
 #if HAVE_SSE4_1
   if (flags & HAS_SSE4_1) {
-    cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4;
-    cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4;
-    cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4;
-    cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4;
-    cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4;
     cpi->rtcd.search.full_search             = vp8_full_search_sadx8;
   }
 #endif