27 files changed, 2481 insertions, 372 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 4e5d9e813..6f980ad65 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -44,6 +44,9 @@ void vpx_log(const char *format, ...);
 /* Segment Feature Masks */
 #define SEGMENT_DELTADATA   0
 #define SEGMENT_ABSDATA     1
+#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF
+#define MAX_MV_REFS 19
+#endif
 
 typedef struct {
   int r, c;
@@ -86,10 +89,6 @@ typedef enum
 #endif
 } INTERPOLATIONFILTERTYPE;
 
-#if 0//CONFIG_SWITCHABLE_INTERP
-#define VP8_SWITCHABLE_FILTERS 2 /* number of switchable filters */
-#endif
-
 typedef enum
 {
   DC_PRED,            /* average of above and left pixels */
@@ -130,13 +129,13 @@ typedef enum {
 typedef enum {
   TX_4X4,                      // 4x4 dct transform
   TX_8X8,                      // 8x8 dct transform
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   TX_16X16,                    // 16x16 dct transform
 #endif
   TX_SIZE_MAX                  // Number of different transforms available
 } TX_SIZE;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 typedef enum {
   DCT_DCT   = 0,                      // DCT  in both horizontal and vertical
   ADST_DCT  = 1,                      // ADST in horizontal, DCT in vertical
@@ -152,10 +151,14 @@ typedef enum {
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 #define ACTIVE_HT 110                // quantization stepsize threshold
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+#define ACTIVE_HT16 300
+#endif
+
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -179,7 +182,7 @@ typedef enum {
   B_MODE_COUNT
 } B_PREDICTION_MODE;
 
-#if CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // convert MB_PREDICTION_MODE to B_PREDICTION_MODE
 static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
   B_PREDICTION_MODE b_mode;
@@ -233,7 +236,7 @@ static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
 union b_mode_info {
   struct {
     B_PREDICTION_MODE first;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
     B_PREDICTION_MODE test;
     TX_TYPE           tx_type;
 #endif
@@ -258,7 +261,7 @@ typedef enum {
 
 typedef struct {
   MB_PREDICTION_MODE mode, uv_mode;
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   MB_PREDICTION_MODE mode_rdopt;
 #endif
 
@@ -270,7 +273,10 @@ typedef struct {
   int_mv mv[2]; // for each reference frame used
 #if CONFIG_NEWBESTREFMV
   int_mv ref_mv, second_ref_mv;
+  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS];
+  int mv_ref_index[MAX_REF_FRAMES];
 #endif
+
   unsigned char partitioning;
   unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
   unsigned char need_to_clamp_mvs;
@@ -419,7 +425,7 @@ typedef struct MacroBlockD {
 
   int corrupted;
 
-#if ARCH_X86 || ARCH_X86_64
+#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
   /* This is an intermediate buffer currently used in sub-pixel motion search
    * to keep a copy of the reference area. This buffer can be used for other
    * purpose.
@@ -432,19 +438,21 @@ typedef struct MacroBlockD {
 #endif
 
   int mb_index;   // Index of the MB in the SB (0..3)
+
 #if CONFIG_NEWBESTREFMV
-  int_mv ref_mv[4];
+  int_mv ref_mv[MAX_MV_REFS];
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   int q_index;
 #endif
 
 } MACROBLOCKD;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // transform mapping
 static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
+  // map transform type
   switch (bmode) {
     case B_TM_PRED :
     case B_RD_PRED :
@@ -467,6 +475,40 @@ static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
       break;
   }
 }
+
+static TX_TYPE get_tx_type(MACROBLOCKD *xd, const BLOCKD *b) {
+  TX_TYPE tx_type = DCT_DCT;
+  int ib = (b - xd->block);
+  if (ib >= 16) return tx_type;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
+    if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
+        xd->q_index < ACTIVE_HT16)
+      tx_type = b->bmi.as_mode.tx_type;
+    return tx_type;
+  }
+#endif
+#if CONFIG_HYBRIDTRANSFORM8X8
+  if (xd->mode_info_context->mbmi.txfm_size  == TX_8X8) {
+    BLOCKD *bb;
+    ib = (ib & 8) + ((ib & 4) >> 1);
+    bb = xd->block + ib;
+    if (xd->mode_info_context->mbmi.mode == I8X8_PRED)
+      tx_type = bb->bmi.as_mode.tx_type;
+    return tx_type;
+  }
+#endif
+#if CONFIG_HYBRIDTRANSFORM
+  if (xd->mode_info_context->mbmi.txfm_size  == TX_4X4) {
+    if (xd->mode_info_context->mbmi.mode == B_PRED &&
+        xd->q_index < ACTIVE_HT) {
+      tx_type = b->bmi.as_mode.tx_type;
+    }
+    return tx_type;
+  }
+#endif
+  return tx_type;
+}
 #endif
 
 extern void vp8_build_block_doffsets(MACROBLOCKD *xd);
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 0fb25cc94..0610356cc 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -13,7 +13,7 @@
    Generated file included by entropy.c */
 #define COEF_UPDATE_PROB 252
 #define COEF_UPDATE_PROB_8X8 252
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #define COEF_UPDATE_PROB_16X16 252
 #endif
 
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
index 940e971b7..717cef78b 100644
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -253,8 +253,253 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES]
     }
   }
 };
+
+#if CONFIG_HYBRIDTRANSFORM
+static const vp8_prob default_hybrid_coef_probs [BLOCK_TYPES]
+[COEF_BANDS]
+[PREV_COEF_CONTEXTS]
+[ENTROPY_NODES] = {
+  {
+    /* Block Type ( 0 ) */
+    {
+      /* Coeff Band ( 0 )*/
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 1 )*/
+      { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+      { 90, 116, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 2 )*/
+      {   1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      {  78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+      {  64, 128, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 3 )*/
+      {   1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      {  77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+      {  64, 100, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 4 )*/
+      {   1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+      {  37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+      {  28, 110, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 5 )*/
+      {   1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+      { 90, 90, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 6 )*/
+      {   1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      {  80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+      {  64, 120, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 7 )*/
+      {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+    }
+  },
+  {
+    /* Block Type ( 1 ) */
+    {
+      /* Coeff Band ( 0 )*/
+      { 198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62 },
+      { 131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1 },
+      {  68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+      {  48,  32, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+    },
+    {
+      /* Coeff Band ( 1 )*/
+      {   1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+      {  81,  99, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+      {  66,  90, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+    },
+    {
+      /* Coeff Band ( 2 )*/
+      {   1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+      {  99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      {  23,  91, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+      {  18,  80, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+    },
+    {
+      /* Coeff Band ( 3 )*/
+      {   1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      {  44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+      {  36, 120, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 4 )*/
+      {   1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      {  94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+      {  22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+      {  18, 90, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 5 )*/
+      {   1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      {  35,  77, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+      {  28,  70, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 6 )*/
+      {   1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      {  45,  99, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+      {  40,  90, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 7 )*/
+      {   1,   1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 203,   1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+      { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+    }
+  },
+  {
+    /* Block Type ( 2 ) */
+    {
+      /* Coeff Band ( 0 )*/
+      { 253,   9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      { 175,  13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      {  73,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+      {  64,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+    },
+    {
+      /* Coeff Band ( 1 )*/
+      {   1,  95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      { 239,  90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+      { 155,  77, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+      { 140,  70, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 2 )*/
+      {   1,  24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+      { 201,  51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      {  69,  46, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+      {  60,  40, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 3 )*/
+      {   1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 132, 118, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 4 )*/
+      {   1,  16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 190,  36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+      { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 5 )*/
+      {   1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 6 )*/
+      {   1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 213,  62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      {  55,  93, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      {  48,  85, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 7 )*/
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+    }
+  },
+  {
+    /* Block Type ( 3 ) */
+    {
+      /* Coeff Band ( 0 )*/
+      { 202,  24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+      { 126,  38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+      {  63,  48, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+      {  54,  40, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+    },
+    {
+      /* Coeff Band ( 1 )*/
+      {   1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+      {  44,  84, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+      {  32,  70, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+    },
+    {
+      /* Coeff Band ( 2 )*/
+      {   1,  52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+      { 124,  74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+      {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+      {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+    },
+    {
+      /* Coeff Band ( 3 )*/
+      {   1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+      {  28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+      {  26, 104, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+    },
+    {
+      /* Coeff Band ( 4 )*/
+      {   1,  81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+      {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+      {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 5 )*/
+      {   1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+      {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+      {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 6 )*/
+      {   1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+      { 141,  84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+      {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+      {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+    },
+    {
+      /* Coeff Band ( 7 )*/
+      {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 244,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+    }
+  }
+};
+#endif
+
 static const vp8_prob
-vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
+default_coef_probs_8x8[BLOCK_TYPES_8X8]
 [COEF_BANDS]
 [PREV_COEF_CONTEXTS]
 [ENTROPY_NODES] = {
@@ -433,9 +678,8 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
       { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128},
       { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
     }
-  }
+  },
 #if CONFIG_HYBRIDTRANSFORM8X8
-  ,
   { /* block Type 3 */
     { /* Coeff Band 0 */
       { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255},
@@ -488,156 +732,183 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
   }
 #endif
 };
-#if CONFIG_TX16X16
+
+#if CONFIG_HYBRIDTRANSFORM8X8
 static const vp8_prob
-vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16]
-                            [COEF_BANDS]
-                            [PREV_COEF_CONTEXTS]
-                            [ENTROPY_NODES] =
-{
-  { /* block Type 0 */
-    { /* Coeff Band 0 */
+default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]
+                                 [COEF_BANDS]
+                                 [PREV_COEF_CONTEXTS]
+                                 [ENTROPY_NODES] = {
+  {
+    /* block Type 0 */
+    {
+      /* Coeff Band 0 */
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
     },
-    { /* Coeff Band 1 */
+    {
+      /* Coeff Band 1 */
       { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
       { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
       { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
       { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
     },
-    { /* Coeff Band 2 */
+    {
+      /* Coeff Band 2 */
       { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
       { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
       { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
       { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
     },
-    { /* Coeff Band 3 */
+    {
+      /* Coeff Band 3 */
       { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
       { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
       { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
       { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
     },
-    { /* Coeff Band 4 */
+    {
+      /* Coeff Band 4 */
       { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
       { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
       { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
       { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
     },
-    { /* Coeff Band 5 */
+    {
+      /* Coeff Band 5 */
       { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
       { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
       { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
       { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
     },
-    { /* Coeff Band 6 */
+    {
+      /* Coeff Band 6 */
       { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
       { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
       { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
       { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
     },
-    { /* Coeff Band 7 */
+    {
+      /* Coeff Band 7 */
       { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
       { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
       { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
       { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
     }
   },
-  { /* block Type 1 */
-    { /* Coeff Band 0 */
+  {
+    /* block Type 1 */
+    {
+      /* Coeff Band 0 */
       { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128},
       { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128},
       { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128}
     },
-    { /* Coeff Band 1 */
+    {
+      /* Coeff Band 1 */
       { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128},
       { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128},
       { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128},
       { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}
     },
-    { /* Coeff Band 2 */
+    {
+      /* Coeff Band 2 */
       { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128},
       { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128},
       { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128},
       { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}
     },
-    { /* Coeff Band 3 */
+    {
+      /* Coeff Band 3 */
       { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128},
       { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128},
       { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128},
       { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}
     },
-    { /* Coeff Band 4 */
+    {
+      /* Coeff Band 4 */
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
     },
-    { /* Coeff Band 5 */
+    {
+      /* Coeff Band 5 */
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
     },
-    { /* Coeff Band 6 */
+    {
+      /* Coeff Band 6 */
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
     },
-    { /* Coeff Band 7 */
+    {
+      /* Coeff Band 7 */
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
     }
   },
-  { /* block Type 2 */
-    { /* Coeff Band 0 */
+  {
+    /* block Type 2 */
+    {
+      /* Coeff Band 0 */
       { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128},
       { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128},
       { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128},
       { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}
     },
-    { /* Coeff Band 1 */
+    {
+      /* Coeff Band 1 */
       { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128},
       { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128},
       { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128},
       { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}
     },
-    { /* Coeff Band 2 */
+    {
+      /* Coeff Band 2 */
       { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128},
       { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128},
       { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128},
       { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}
     },
-    { /* Coeff Band 3 */
+    {
+      /* Coeff Band 3 */
       { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128},
       { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128},
       { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128},
       { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}
     },
-    { /* Coeff Band 4 */
+    {
+      /* Coeff Band 4 */
       { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128},
       { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128},
       { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128},
       { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}
     },
-    { /* Coeff Band 5 */
+    {
+      /* Coeff Band 5 */
       { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128},
       { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128},
       { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128},
       { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}
     },
-    { /* Coeff Band 6 */
+    {
+      /* Coeff Band 6 */
       { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128},
       { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128},
       { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128},
       { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}
     },
-    { /* Coeff Band 7 */
+    {
+      /* Coeff Band 7 */
       { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128},
       { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128},
       { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128},
@@ -646,6 +917,423 @@ vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16]
   },
   { /* block Type 3 */
     { /* Coeff Band 0 */
+      { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255},
+      { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255},
+      { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    },
+    { /* Coeff Band 1 */
+      { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128},
+      { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128},
+      { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128},
+      { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128}
+    },
+    { /* Coeff Band 2 */
+      { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128},
+      { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128},
+      { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128},
+      { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128}
+    },
+    { /* Coeff Band 3 */
+      { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128},
+      { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128},
+      { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128},
+      { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128}
+    },
+    { /* Coeff Band 4 */
+      { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128},
+      { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128},
+      { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128},
+      { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128}
+    },
+    { /* Coeff Band 5 */
+      { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128},
+      { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128},
+      { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128},
+      { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128}
+    },
+    { /* Coeff Band 6 */
+      { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128},
+      { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128},
+      { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128},
+      { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128}
+    },
+    { /* Coeff Band 7 */
+      { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128},
+      { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128},
+      { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    }
+  }
+};
+#endif
+
+#if CONFIG_TX16X16
+static const vp8_prob
+  default_coef_probs_16x16[BLOCK_TYPES_16X16]
+                          [COEF_BANDS]
+                          [PREV_COEF_CONTEXTS]
+                          [ENTROPY_NODES] = {
+  { /* block Type 0 */
+    { /* Coeff Band 0 */
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    },
+    { /* Coeff Band 1 */
+      { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
+      { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
+      { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
+      { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
+    },
+    { /* Coeff Band 2 */
+      { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
+      { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
+      { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
+      { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
+    },
+    { /* Coeff Band 3 */
+      { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
+      { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
+      { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
+      { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
+    },
+    { /* Coeff Band 4 */
+      { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
+      { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
+      { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
+      { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
+    },
+    { /* Coeff Band 5 */
+      { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
+      { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+    },
+    { /* Coeff Band 6 */
+      { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
+      { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+    },
+    { /* Coeff Band 7 */
+      { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
+      { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
+      { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
+      { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
+    }
+  },
+  { /* block Type 1 */
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
+  },
+  { /* block Type 2 */
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
+  },
+  { /* block Type 3 */
+    { /* Coeff Band 0 */
+      { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184},
+      { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200},
+      { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    },
+    { /* Coeff Band 1 */
+      { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128},
+      { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128},
+      { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255},
+      { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255}
+    },
+    { /* Coeff Band 2 */
+      { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128},
+      { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255},
+      { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255},
+      { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255}
+    },
+    { /* Coeff Band 3 */
+      { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128},
+      { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128},
+      { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128},
+      { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255}
+    },
+    { /* Coeff Band 4 */
+      { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128},
+      { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128},
+      { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128},
+      { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128}
+    },
+    { /* Coeff Band 5 */
+      { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128},
+      { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128},
+      { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128},
+      { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128}
+    },
+    { /* Coeff Band 6 */
+      { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128},
+      { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128},
+      { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128},
+      { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255}
+    },
+    { /* Coeff Band 7 */
+      { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128},
+      { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128},
+      { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128},
+      { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128}
+    }
+  }
+};
+
+#if CONFIG_HYBRIDTRANSFORM16X16
+static const vp8_prob
+  default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]
+                                 [COEF_BANDS]
+                                 [PREV_COEF_CONTEXTS]
+                                 [ENTROPY_NODES] = {
+  { /* block Type 0 */
+    { /* Coeff Band 0 */
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    },
+    { /* Coeff Band 1 */
+      { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128},
+      { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128},
+      { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128},
+      { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}
+    },
+    { /* Coeff Band 2 */
+      { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128},
+      { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128},
+      { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128},
+      { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}
+    },
+    { /* Coeff Band 3 */
+      { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128},
+      { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128},
+      { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128},
+      { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}
+    },
+    { /* Coeff Band 4 */
+      { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128},
+      { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128},
+      { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128},
+      { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}
+    },
+    { /* Coeff Band 5 */
+      { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128},
+      { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+    },
+    { /* Coeff Band 6 */
+      { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128},
+      { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128},
+      { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}
+    },
+    { /* Coeff Band 7 */
+      { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128},
+      { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128},
+      { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128},
+      { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}
+    }
+  },
+  { /* block Type 1 */
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
+  },
+  { /* block Type 2 */
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
+  },
+  { /* block Type 3 */
+    { /* Coeff Band 0 */
       { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184},
       { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200},
       { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247},
@@ -696,3 +1384,4 @@ vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16]
   }
 };
 #endif
+#endif
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index cbe798289..67242d5ea 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -28,6 +28,8 @@ typedef vp8_prob Prob;
 
 #include "coefupdateprobs.h"
 
+const int vp8_i8x8_block[4] = {0, 2, 8, 10};
+
 DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = {
   0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -95,7 +97,7 @@ DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = {
   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
 };
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 // Table can be optimized.
 DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]) = {
     0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
@@ -208,13 +210,26 @@ vp8_extra_bit_struct vp8_extra_bits[12] = {
 void vp8_default_coef_probs(VP8_COMMON *pc) {
   vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
              sizeof(pc->fc.coef_probs));
+#if CONFIG_HYBRIDTRANSFORM
+  vpx_memcpy(pc->fc.hybrid_coef_probs, default_hybrid_coef_probs,
+             sizeof(pc->fc.hybrid_coef_probs));
+#endif
 
-  vpx_memcpy(pc->fc.coef_probs_8x8, vp8_default_coef_probs_8x8,
+  vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8,
              sizeof(pc->fc.coef_probs_8x8));
+#if CONFIG_HYBRIDTRANSFORM8X8
+  vpx_memcpy(pc->fc.hybrid_coef_probs_8x8, default_hybrid_coef_probs_8x8,
+             sizeof(pc->fc.hybrid_coef_probs_8x8));
+#endif
 
 #if CONFIG_TX16X16
-  vpx_memcpy(pc->fc.coef_probs_16x16, vp8_default_coef_probs_16x16,
+  vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16,
              sizeof(pc->fc.coef_probs_16x16));
+#if CONFIG_HYBRIDTRANSFORM16X16
+  vpx_memcpy(pc->fc.hybrid_coef_probs_16x16,
+             default_hybrid_coef_probs_16x16,
+             sizeof(pc->fc.hybrid_coef_probs_16x16));
+#endif
 #endif
 }
 
@@ -263,7 +278,8 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
         printf("    {\n");
         for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
           printf("      {");
-          for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) printf("%d, ", cm->fc.coef_counts[i][j][k][t]);
+          for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+            printf("%d, ", cm->fc.coef_counts[i][j][k][t]);
           printf("},\n");
         }
         printf("    },\n");
@@ -280,7 +296,26 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
         printf("    {\n");
         for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
           printf("      {");
-          for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) printf("%d, ", cm->fc.coef_counts_8x8[i][j][k][t]);
+          for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+            printf("%d, ", cm->fc.coef_counts_8x8[i][j][k][t]);
+          printf("},\n");
+        }
+        printf("    },\n");
+      }
+      printf("  },\n");
+    }
+    printf("};\n");
+    printf("static const unsigned int\nhybrid_coef_counts"
+           "[BLOCK_TYPES] [COEF_BANDS]"
+           "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
+    for (i = 0; i < BLOCK_TYPES; ++i) {
+      printf("  {\n");
+      for (j = 0; j < COEF_BANDS; ++j) {
+        printf("    {\n");
+        for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+          printf("      {");
+          for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+            printf("%d, ", cm->fc.hybrid_coef_counts[i][j][k][t]);
           printf("},\n");
         }
         printf("    },\n");
@@ -313,6 +348,30 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
         }
       }
 
+#if CONFIG_HYBRIDTRANSFORM
+  for (i = 0; i < BLOCK_TYPES; ++i)
+    for (j = 0; j < COEF_BANDS; ++j)
+      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+        if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+          continue;
+        vp8_tree_probs_from_distribution(
+          MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+          coef_probs, branch_ct, cm->fc.hybrid_coef_counts [i][j][k],
+          256, 1);
+        for (t = 0; t < ENTROPY_NODES; ++t) {
+          int prob;
+          count = branch_ct[t][0] + branch_ct[t][1];
+          count = count > count_sat ? count_sat : count;
+          factor = (update_factor * count / count_sat);
+          prob = ((int)cm->fc.pre_hybrid_coef_probs[i][j][k][t] * (256 - factor) +
+                  (int)coef_probs[t] * factor + 128) >> 8;
+          if (prob <= 0) cm->fc.hybrid_coef_probs[i][j][k][t] = 1;
+          else if (prob > 255) cm->fc.hybrid_coef_probs[i][j][k][t] = 255;
+          else cm->fc.hybrid_coef_probs[i][j][k][t] = prob;
+        }
+      }
+#endif
+
   for (i = 0; i < BLOCK_TYPES_8X8; ++i)
     for (j = 0; j < COEF_BANDS; ++j)
       for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
@@ -335,6 +394,31 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
         }
       }
 
+#if CONFIG_HYBRIDTRANSFORM8X8
+  for (i = 0; i < BLOCK_TYPES_8X8; ++i)
+    for (j = 0; j < COEF_BANDS; ++j)
+      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+        if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+          continue;
+        vp8_tree_probs_from_distribution(
+          MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+          coef_probs, branch_ct, cm->fc.hybrid_coef_counts_8x8 [i][j][k],
+          256, 1);
+        for (t = 0; t < ENTROPY_NODES; ++t) {
+          int prob;
+          count = branch_ct[t][0] + branch_ct[t][1];
+          count = count > count_sat ? count_sat : count;
+          factor = (update_factor * count / count_sat);
+          prob = ((int)cm->fc.pre_hybrid_coef_probs_8x8[i][j][k][t] *
+                  (256 - factor) +
+                  (int)coef_probs[t] * factor + 128) >> 8;
+          if (prob <= 0) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 1;
+          else if (prob > 255) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 255;
+          else cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = prob;
+        }
+      }
+#endif
+
 #if CONFIG_TX16X16
   for (i = 0; i < BLOCK_TYPES_16X16; ++i)
     for (j = 0; j < COEF_BANDS; ++j)
@@ -349,12 +433,36 @@ void vp8_adapt_coef_probs(VP8_COMMON *cm) {
           count = branch_ct[t][0] + branch_ct[t][1];
           count = count > count_sat ? count_sat : count;
           factor = (update_factor * count / count_sat);
-          prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] * (256 - factor) +
+          prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] *
+                  (256 - factor) +
                   (int)coef_probs[t] * factor + 128) >> 8;
           if (prob <= 0) cm->fc.coef_probs_16x16[i][j][k][t] = 1;
           else if (prob > 255) cm->fc.coef_probs_16x16[i][j][k][t] = 255;
           else cm->fc.coef_probs_16x16[i][j][k][t] = prob;
         }
       }
+
+#if CONFIG_HYBRIDTRANSFORM16X16
+  for (i = 0; i < BLOCK_TYPES_16X16; ++i)
+    for (j = 0; j < COEF_BANDS; ++j)
+      for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+        if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+          continue;
+        vp8_tree_probs_from_distribution(
+          MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+          coef_probs, branch_ct, cm->fc.hybrid_coef_counts_16x16[i][j][k], 256, 1);
+        for (t = 0; t < ENTROPY_NODES; ++t) {
+          int prob;
+          count = branch_ct[t][0] + branch_ct[t][1];
+          count = count > count_sat ? count_sat : count;
+          factor = (update_factor * count / count_sat);
+          prob = ((int)cm->fc.pre_hybrid_coef_probs_16x16[i][j][k][t] * (256 - factor) +
+                  (int)coef_probs[t] * factor + 128) >> 8;
+          if (prob <= 0) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 1;
+          else if (prob > 255) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 255;
+          else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob;
+        }
+      }
+#endif
 #endif
 }
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index a435448e6..b3d3eff9f 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -21,6 +21,8 @@
 //#define SUBMVREF_COUNT 5
 //#define VP8_NUMMBSPLITS 4
 
+extern const int vp8_i8x8_block[4];
+
 /* Coefficient token alphabet */
 
 #define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
@@ -62,11 +64,8 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
 /* Outside dimension.  0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
 #define BLOCK_TYPES 4
 
-#if CONFIG_HYBRIDTRANSFORM8X8
 #define BLOCK_TYPES_8X8 4
-#else
-#define BLOCK_TYPES_8X8 3
-#endif
+
 #define BLOCK_TYPES_16X16 4
 
 /* Middle dimension is a coarsening of the coefficient's
@@ -75,7 +74,7 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
 #define COEF_BANDS 8
 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands[16]);
 extern DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]);
 #endif
 
@@ -100,8 +99,6 @@ extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]);
 
 #define SUBEXP_PARAM                4   /* Subexponential code parameter */
 #define MODULUS_PARAM               13  /* Modulus parameter */
-#define COEFUPDATETYPE              1   /* coef update type to use (1/2) */
-
 
 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
 
@@ -118,7 +115,7 @@ extern short vp8_default_zig_zag_mask[16];
 extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
 void vp8_coef_tree_initialize(void);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d_16x16[256]);
 #endif
 void vp8_adapt_coef_probs(struct VP8Common *);
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 1664b2899..6c31236ec 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -12,6 +12,449 @@
 #include "onyxc_int.h"
 #include "entropymv.h"
 
+//#define MV_COUNT_TESTING
+
+#if CONFIG_NEWMVENTROPY
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 160
+
+/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
+#define COMPANDED_MVREF_THRESH    8
+
+/* Smooth or bias the mv-counts before prob computation */
+/* #define SMOOTH_MV_COUNTS */
+
+const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2] = {
+  -MV_JOINT_ZERO, 2,
+  -MV_JOINT_HNZVZ, 4,
+  -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+struct vp8_token_struct vp8_mv_joint_encodings[MV_JOINTS];
+
+const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2] = {
+  -MV_CLASS_0, 2,
+  -MV_CLASS_1, 4,
+  6, 8,
+  -MV_CLASS_2, -MV_CLASS_3,
+  10, 12,
+  -MV_CLASS_4, -MV_CLASS_5,
+  -MV_CLASS_6, -MV_CLASS_7,
+};
+struct vp8_token_struct vp8_mv_class_encodings[MV_CLASSES];
+
+const vp8_tree_index vp8_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
+  -0, -1,
+};
+struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE];
+
+const vp8_tree_index vp8_mv_fp_tree [2 * 4 - 2] = {
+  -0, 2,
+  -1, 4,
+  -2, -3
+};
+struct vp8_token_struct vp8_mv_fp_encodings[4];
+
+const nmv_context vp8_default_nmv_context = {
+  {32, 64, 96},
+  {
+    { /* vert component */
+      128,                                             /* sign */
+      {224, 144, 192, 168, 192, 176, 192},             /* class */
+      {216},                                           /* class0 */
+      {136, 140, 148, 160, 176, 192, 224},             /* bits */
+      {{128, 128, 64}, {96, 112, 64}},                 /* class0_fp */
+      {64, 96, 64},                                    /* fp */
+      160,                                             /* class0_hp bit */
+      128,                                             /* hp */
+    },
+    { /* hor component */
+      128,                                             /* sign */
+      {216, 128, 176, 160, 176, 176, 192},             /* class */
+      {208},                                           /* class0 */
+      {136, 140, 148, 160, 176, 192, 224},             /* bits */
+      {{128, 128, 64}, {96, 112, 64}},                 /* class0_fp */
+      {64, 96, 64},                                    /* fp */
+      160,                                             /* class0_hp bit */
+      128,                                             /* hp */
+    }
+  },
+};
+
+MV_JOINT_TYPE vp8_get_mv_joint(MV mv) {
+  if (mv.row == 0 && mv.col == 0) return MV_JOINT_ZERO;
+  else if (mv.row == 0 && mv.col != 0) return MV_JOINT_HNZVZ;
+  else if (mv.row != 0 && mv.col == 0) return MV_JOINT_HZVNZ;
+  else return MV_JOINT_HNZVNZ;
+}
+
+#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
+
+MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset) {
+  MV_CLASS_TYPE c;
+  if      (z < CLASS0_SIZE * 8)    c = MV_CLASS_0;
+  else if (z < CLASS0_SIZE * 16)   c = MV_CLASS_1;
+  else if (z < CLASS0_SIZE * 32)   c = MV_CLASS_2;
+  else if (z < CLASS0_SIZE * 64)   c = MV_CLASS_3;
+  else if (z < CLASS0_SIZE * 128)  c = MV_CLASS_4;
+  else if (z < CLASS0_SIZE * 256)  c = MV_CLASS_5;
+  else if (z < CLASS0_SIZE * 512)  c = MV_CLASS_6;
+  else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
+  else assert(0);
+  if (offset)
+    *offset = z - mv_class_base(c);
+  return c;
+}
+
+int vp8_use_nmv_hp(const MV *ref) {
+  if ((abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+      (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH)
+    return 1;
+  else
+    return 0;
+}
+
+int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset) {
+  return mv_class_base(c) + offset;
+}
+
+static void increment_nmv_component_count(int v,
+                                          nmv_component_counts *mvcomp,
+                                          int incr,
+                                          int usehp) {
+  assert (v != 0);            /* should not be zero */
+  mvcomp->mvcount[MV_MAX + v] += incr;
+}
+
+static void increment_nmv_component(int v,
+                                    nmv_component_counts *mvcomp,
+                                    int incr,
+                                    int usehp) {
+  int s, z, c, o, d, e, f;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  mvcomp->sign[s] += incr;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+  mvcomp->classes[c] += incr;
+
+  d = (o >> 3);               /* int mv data */
+  f = (o >> 1) & 3;           /* fractional pel mv data */
+  e = (o & 1);                /* high precision mv data */
+  if (c == MV_CLASS_0) {
+    mvcomp->class0[d] += incr;
+  } else {
+    int i, b;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      mvcomp->bits[i][((d >> i) & 1)] += incr;
+  }
+
+  /* Code the fractional pel bits */
+  if (c == MV_CLASS_0) {
+    mvcomp->class0_fp[d][f] += incr;
+  } else {
+    mvcomp->fp[f] += incr;
+  }
+
+  /* Code the high precision bit */
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      mvcomp->class0_hp[e] += incr;
+    } else {
+      mvcomp->hp[e] += incr;
+    }
+  }
+}
+
+#ifdef SMOOTH_MV_COUNTS
+static void smooth_counts(nmv_component_counts *mvcomp) {
+  static const int flen = 3;  // (filter_length + 1) / 2
+  static const int fval[] = {8, 3, 1};
+  static const int fvalbits = 4;
+  int i;
+  unsigned int smvcount[MV_VALS];
+  vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount));
+  smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1;
+  for (i = flen - 1; i <= MV_VALS - flen; ++i) {
+    int j, s = smvcount[i] * fval[0];
+    for (j = 1; j < flen; ++j)
+      s += (smvcount[i - j] + smvcount[i + j]) * fval[j];
+    mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits;
+  }
+}
+#endif
+
+static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
+  int v;
+  vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
+  for (v = 1; v <= MV_MAX; v++) {
+    increment_nmv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp);
+    increment_nmv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp);
+  }
+}
+
+void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+                       int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  mvctx->joints[j]++;
+  usehp = usehp && vp8_use_nmv_hp(ref);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp);
+  }
+}
+
+static void adapt_prob(vp8_prob *dest, vp8_prob prep, vp8_prob newp,
+                       unsigned int ct[2]) {
+  int factor;
+  int prob;
+  int count = ct[0] + ct[1];
+  if (count) {
+    count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
+    factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
+    prob = ((int)prep * (256 - factor) + (int)(newp) * factor + 128) >> 8;
+    prob += !prob;
+    prob = (prob > 255 ? 255 : prob);
+    *dest = prob;
+  }
+}
+
+void vp8_counts_to_nmv_context(
+    nmv_context_counts *NMVcount,
+    nmv_context *prob,
+    int usehp,
+    unsigned int (*branch_ct_joint)[2],
+    unsigned int (*branch_ct_sign)[2],
+    unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+    unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+    unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+    unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+    unsigned int (*branch_ct_fp)[4 - 1][2],
+    unsigned int (*branch_ct_class0_hp)[2],
+    unsigned int (*branch_ct_hp)[2]) {
+  int i, j, k;
+  counts_to_context(&NMVcount->comps[0], usehp);
+  counts_to_context(&NMVcount->comps[1], usehp);
+  vp8_tree_probs_from_distribution(MV_JOINTS,
+                                   vp8_mv_joint_encodings,
+                                   vp8_mv_joint_tree,
+                                   prob->joints,
+                                   branch_ct_joint,
+                                   NMVcount->joints,
+                                   256, 1);
+  for (i = 0; i < 2; ++i) {
+    prob->comps[i].sign =
+        vp8_bin_prob_from_distribution(NMVcount->comps[i].sign);
+    branch_ct_sign[i][0] = NMVcount->comps[i].sign[0];
+    branch_ct_sign[i][1] = NMVcount->comps[i].sign[1];
+    vp8_tree_probs_from_distribution(MV_CLASSES,
+                                     vp8_mv_class_encodings,
+                                     vp8_mv_class_tree,
+                                     prob->comps[i].classes,
+                                     branch_ct_classes[i],
+                                     NMVcount->comps[i].classes,
+                                     256, 1);
+    vp8_tree_probs_from_distribution(CLASS0_SIZE,
+                                     vp8_mv_class0_encodings,
+                                     vp8_mv_class0_tree,
+                                     prob->comps[i].class0,
+                                     branch_ct_class0[i],
+                                     NMVcount->comps[i].class0,
+                                     256, 1);
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      prob->comps[i].bits[j] = vp8_bin_prob_from_distribution(
+          NMVcount->comps[i].bits[j]);
+      branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0];
+      branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1];
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (k = 0; k < CLASS0_SIZE; ++k) {
+      vp8_tree_probs_from_distribution(4,
+                                       vp8_mv_fp_encodings,
+                                       vp8_mv_fp_tree,
+                                       prob->comps[i].class0_fp[k],
+                                       branch_ct_class0_fp[i][k],
+                                       NMVcount->comps[i].class0_fp[k],
+                                       256, 1);
+    }
+    vp8_tree_probs_from_distribution(4,
+                                     vp8_mv_fp_encodings,
+                                     vp8_mv_fp_tree,
+                                     prob->comps[i].fp,
+                                     branch_ct_fp[i],
+                                     NMVcount->comps[i].fp,
+                                     256, 1);
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      prob->comps[i].class0_hp = vp8_bin_prob_from_distribution(
+          NMVcount->comps[i].class0_hp);
+      branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0];
+      branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1];
+
+      prob->comps[i].hp =
+          vp8_bin_prob_from_distribution(NMVcount->comps[i].hp);
+      branch_ct_hp[i][0] = NMVcount->comps[i].hp[0];
+      branch_ct_hp[i][1] = NMVcount->comps[i].hp[1];
+    }
+  }
+}
+
+void vp8_adapt_nmv_probs(VP8_COMMON *cm, int usehp) {
+  int i, j, k;
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+#ifdef MV_COUNT_TESTING
+  printf("joints count: ");
+  for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
+  printf("\n"); fflush(stdout);
+  printf("signs count:\n");
+  for (i = 0; i < 2; ++i)
+    printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]);
+  printf("\n"); fflush(stdout);
+  printf("classes count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_CLASSES; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].classes[j]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("class0 count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].class0[j]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("bits count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0],
+                       cm->fc.NMVcount.comps[i].bits[j][1]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("class0_fp count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 4; ++k)
+        printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("\n"); fflush(stdout);
+  }
+  printf("fp count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < 4; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].fp[j]);
+    printf("\n"); fflush(stdout);
+  }
+  if (usehp) {
+    printf("class0_hp count:\n");
+    for (i = 0; i < 2; ++i)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0],
+                       cm->fc.NMVcount.comps[i].class0_hp[1]);
+    printf("\n"); fflush(stdout);
+    printf("hp count:\n");
+    for (i = 0; i < 2; ++i)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0],
+                       cm->fc.NMVcount.comps[i].hp[1]);
+    printf("\n"); fflush(stdout);
+  }
+#endif
+#ifdef SMOOTH_MV_COUNTS
+  smooth_counts(&cm->fc.NMVcount.comps[0]);
+  smooth_counts(&cm->fc.NMVcount.comps[1]);
+#endif
+  vp8_counts_to_nmv_context(&cm->fc.NMVcount,
+                            &prob,
+                            usehp,
+                            branch_ct_joint,
+                            branch_ct_sign,
+                            branch_ct_classes,
+                            branch_ct_class0,
+                            branch_ct_bits,
+                            branch_ct_class0_fp,
+                            branch_ct_fp,
+                            branch_ct_class0_hp,
+                            branch_ct_hp);
+
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    adapt_prob(&cm->fc.nmvc.joints[j],
+               cm->fc.pre_nmvc.joints[j],
+               prob.joints[j],
+               branch_ct_joint[j]);
+  }
+  for (i = 0; i < 2; ++i) {
+    adapt_prob(&cm->fc.nmvc.comps[i].sign,
+               cm->fc.pre_nmvc.comps[i].sign,
+               prob.comps[i].sign,
+               branch_ct_sign[i]);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].classes[j],
+                 cm->fc.pre_nmvc.comps[i].classes[j],
+                 prob.comps[i].classes[j],
+                 branch_ct_classes[i][j]);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].class0[j],
+                 cm->fc.pre_nmvc.comps[i].class0[j],
+                 prob.comps[i].class0[j],
+                 branch_ct_class0[i][j]);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
+                 cm->fc.pre_nmvc.comps[i].bits[j],
+                 prob.comps[i].bits[j],
+                 branch_ct_bits[i][j]);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 3; ++k) {
+        adapt_prob(&cm->fc.nmvc.comps[i].class0_fp[j][k],
+                   cm->fc.pre_nmvc.comps[i].class0_fp[j][k],
+                   prob.comps[i].class0_fp[j][k],
+                   branch_ct_class0_fp[i][j][k]);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].fp[j],
+                 cm->fc.pre_nmvc.comps[i].fp[j],
+                 prob.comps[i].fp[j],
+                 branch_ct_fp[i][j]);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
+                 cm->fc.pre_nmvc.comps[i].class0_hp,
+                 prob.comps[i].class0_hp,
+                 branch_ct_class0_hp[i]);
+      adapt_prob(&cm->fc.nmvc.comps[i].hp,
+                 cm->fc.pre_nmvc.comps[i].hp,
+                 prob.comps[i].hp,
+                 branch_ct_hp[i]);
+    }
+  }
+}
+
+#else   /* CONFIG_NEWMVENTROPY */
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 128
+
 const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] = {
   {{
       237,
@@ -266,14 +709,6 @@ static void compute_component_probs_hp(
   }
 }
 
-void vp8_entropy_mv_init() {
-  vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
-  vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
-}
-
-// #define MV_COUNT_TESTING
-#define MV_COUNT_SAT 16
-#define MV_MAX_UPDATE_FACTOR 128
 void vp8_adapt_mv_probs(VP8_COMMON *cm) {
   int i, t, count, factor;
 #ifdef MV_COUNT_TESTING
@@ -400,3 +835,28 @@ void vp8_adapt_mv_probs(VP8_COMMON *cm) {
     }
   }
 }
+
+#endif  /* CONFIG_NEWMVENTROPY */
+
+void vp8_entropy_mv_init() {
+#if CONFIG_NEWMVENTROPY
+  vp8_tokens_from_tree(vp8_mv_joint_encodings, vp8_mv_joint_tree);
+  vp8_tokens_from_tree(vp8_mv_class_encodings, vp8_mv_class_tree);
+  vp8_tokens_from_tree(vp8_mv_class0_encodings, vp8_mv_class0_tree);
+  vp8_tokens_from_tree(vp8_mv_fp_encodings, vp8_mv_fp_tree);
+#else
+  vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
+  vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
+#endif
+}
+
+void vp8_init_mv_probs(VP8_COMMON *cm) {
+#if CONFIG_NEWMVENTROPY
+  vpx_memcpy(&cm->fc.nmvc, &vp8_default_nmv_context, sizeof(nmv_context));
+#else
+  vpx_memcpy(cm->fc.mvc,
+             vp8_default_mv_context, sizeof(vp8_default_mv_context));
+  vpx_memcpy(cm->fc.mvc_hp,
+             vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
+#endif
+}
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 535d9b8ac..1a193b172 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -16,6 +16,121 @@
 #include "vpx_config.h"
 #include "blockd.h"
 
+struct VP8Common;
+
+void vp8_entropy_mv_init();
+void vp8_init_mv_probs(struct VP8Common *cm);
+void vp8_adapt_mv_probs(struct VP8Common *cm);
+
+#if CONFIG_NEWMVENTROPY
+void vp8_adapt_nmv_probs(struct VP8Common *cm, int usehp);
+void vp8_lower_mv_precision(MV *mv);
+int vp8_use_nmv_hp(const MV *ref);
+
+#define VP8_NMV_UPDATE_PROB  255
+//#define MV_GROUP_UPDATE
+
+#define LOW_PRECISION_MV_UPDATE  /* Use 7 bit forward update */
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS     4
+typedef enum {
+  MV_JOINT_ZERO = 0,             /* Zero vector */
+  MV_JOINT_HNZVZ = 1,            /* Vert zero, hor nonzero */
+  MV_JOINT_HZVNZ = 2,            /* Hor zero, vert nonzero */
+  MV_JOINT_HNZVNZ = 3,           /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+extern const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2];
+extern struct vp8_token_struct vp8_mv_joint_encodings [MV_JOINTS];
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES     8
+typedef enum {
+  MV_CLASS_0 = 0,      /* (0, 2]     integer pel */
+  MV_CLASS_1 = 1,      /* (2, 4]     integer pel */
+  MV_CLASS_2 = 2,      /* (4, 8]     integer pel */
+  MV_CLASS_3 = 3,      /* (8, 16]    integer pel */
+  MV_CLASS_4 = 4,      /* (16, 32]   integer pel */
+  MV_CLASS_5 = 5,      /* (32, 64]   integer pel */
+  MV_CLASS_6 = 6,      /* (64, 128]  integer pel */
+  MV_CLASS_7 = 7,      /* (128, 256] integer pel */
+} MV_CLASS_TYPE;
+
+extern const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2];
+extern struct vp8_token_struct vp8_mv_class_encodings [MV_CLASSES];
+
+#define CLASS0_BITS    1  /* bits at integer precision for class 0 */
+#define CLASS0_SIZE    (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+
+#define MV_MAX_BITS    (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX         ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS        ((MV_MAX << 1) + 1)
+
+extern const vp8_tree_index vp8_mv_class0_tree[2 * CLASS0_SIZE - 2];
+extern struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE];
+
+extern const vp8_tree_index vp8_mv_fp_tree[2 * 4 - 2];
+extern struct vp8_token_struct vp8_mv_fp_encodings[4];
+
+typedef struct {
+  vp8_prob sign;
+  vp8_prob classes[MV_CLASSES - 1];
+  vp8_prob class0[CLASS0_SIZE - 1];
+  vp8_prob bits[MV_OFFSET_BITS];
+  vp8_prob class0_fp[CLASS0_SIZE][4 - 1];
+  vp8_prob fp[4 - 1];
+  vp8_prob class0_hp;
+  vp8_prob hp;
+} nmv_component;
+
+typedef struct {
+  vp8_prob joints[MV_JOINTS - 1];
+  nmv_component comps[2];
+} nmv_context;
+
+MV_JOINT_TYPE vp8_get_mv_joint(MV mv);
+MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset);
+int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset);
+
+
+typedef struct {
+  unsigned int mvcount[MV_VALS];
+  unsigned int sign[2];
+  unsigned int classes[MV_CLASSES];
+  unsigned int class0[CLASS0_SIZE];
+  unsigned int bits[MV_OFFSET_BITS][2];
+  unsigned int class0_fp[CLASS0_SIZE][4];
+  unsigned int fp[4];
+  unsigned int class0_hp[2];
+  unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+  unsigned int joints[MV_JOINTS];
+  nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+                       int usehp);
+extern const nmv_context vp8_default_nmv_context;
+void vp8_counts_to_nmv_context(
+    nmv_context_counts *NMVcount,
+    nmv_context *prob,
+    int usehp,
+    unsigned int (*branch_ct_joint)[2],
+    unsigned int (*branch_ct_sign)[2],
+    unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+    unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+    unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+    unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+    unsigned int (*branch_ct_fp)[4 - 1][2],
+    unsigned int (*branch_ct_class0_hp)[2],
+    unsigned int (*branch_ct_hp)[2]);
+
+#else  /* CONFIG_NEWMVENTROPY */
+
 enum {
   mv_max  = 1023,              /* max absolute value of a MV component */
   MVvals = (2 * mv_max) + 1,   /* # possible values "" */
@@ -73,8 +188,6 @@ extern struct vp8_token_struct vp8_small_mvencodings [8];
 extern const vp8_tree_index vp8_small_mvtree_hp[];
 extern struct vp8_token_struct vp8_small_mvencodings_hp [16];
 
-void vp8_entropy_mv_init();
-struct VP8Common;
-void vp8_adapt_mv_probs(struct VP8Common *cm);
+#endif  /* CONFIG_NEWMVENTROPY */
 
 #endif
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index 6f7361dd0..235ca46ce 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -20,15 +20,20 @@ const unsigned char vp8_mbsplit_offset[4][16] = {
   { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
 };
 
-static void lower_mv_precision(int_mv *mv)
+static void lower_mv_precision(int_mv *mv, int usehp)
 {
-  if (mv->as_mv.row & 1)
-    mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1);
-  if (mv->as_mv.col & 1)
-    mv->as_mv.col += (mv->as_mv.col > 0 ? -1 : 1);
+#if CONFIG_NEWMVENTROPY
+  if (!usehp || !vp8_use_nmv_hp(&mv->as_mv)) {
+#else
+  if (!usehp) {
+#endif
+    if (mv->as_mv.row & 1)
+      mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1);
+    if (mv->as_mv.col & 1)
+      mv->as_mv.col += (mv->as_mv.col > 0 ? -1 : 1);
+  }
 }
 
-
 /* Predict motion vectors using those from already-decoded nearby blocks.
    Note that we only consider one 4x4 subblock from each candidate 16x16
    macroblock.   */
@@ -173,11 +178,9 @@ void vp8_find_near_mvs
   /* Make sure that the 1/8th bits of the Mvs are zero if high_precision
    * is not being used, by truncating the last bit towards 0
    */
-  if (!xd->allow_high_precision_mv) {
-    lower_mv_precision(best_mv);
-    lower_mv_precision(nearest);
-    lower_mv_precision(nearby);
-  }
+  lower_mv_precision(best_mv, xd->allow_high_precision_mv);
+  lower_mv_precision(nearest, xd->allow_high_precision_mv);
+  lower_mv_precision(nearby, xd->allow_high_precision_mv);
 
   // TODO: move clamp outside findnearmv
   vp8_clamp_mv2(nearest, xd);
@@ -200,75 +203,109 @@ vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc,
  * above and a number cols of pixels in the left to select the one with best
  * score to use as ref motion vector
  */
+
 void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
                            unsigned char *ref_y_buffer,
                            int ref_y_stride,
+                           int_mv *mvlist,
                            int_mv *best_mv,
                            int_mv *nearest,
                            int_mv *near) {
-  int_mv *ref_mv = xd->ref_mv;
-  int bestsad = INT_MAX;
-  int i;
+  int i, j;
   unsigned char *above_src;
   unsigned char *left_src;
   unsigned char *above_ref;
   unsigned char *left_ref;
   int sad;
+  int sad_scores[MAX_MV_REFS];
+  int_mv sorted_mvs[MAX_MV_REFS];
+  int zero_seen = FALSE;
 
-  above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
-  left_src  = xd->dst.y_buffer - 2;
-  above_ref = ref_y_buffer - ref_y_stride * 2;
-  left_ref  = ref_y_buffer - 2;
+  // Default all to 0,0 if nothing else available
+  best_mv->as_int = nearest->as_int = near->as_int = 0;
+  vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));
 
-  bestsad = vp8_sad16x2_c(above_src, xd->dst.y_stride,
-                          above_ref, ref_y_stride,
-                          INT_MAX);
-  bestsad += vp8_sad2x16_c(left_src, xd->dst.y_stride,
-                           left_ref, ref_y_stride,
-                           INT_MAX);
-  best_mv->as_int = 0;
+  above_src = xd->dst.y_buffer - xd->dst.y_stride * 3;
+  left_src  = xd->dst.y_buffer - 3;
+  above_ref = ref_y_buffer - ref_y_stride * 3;
+  left_ref  = ref_y_buffer - 3;
 
+  //for(i = 0; i < MAX_MV_REFS; ++i) {
+  // Limit search to the predicted best 4
   for(i = 0; i < 4; ++i) {
-    if (ref_mv[i].as_int) {
-      int_mv this_mv;
-      int offset=0;
-      int row_offset, col_offset;
-      this_mv.as_int = ref_mv[i].as_int;
-      vp8_clamp_mv(&this_mv,
-                   xd->mb_to_left_edge - LEFT_TOP_MARGIN + 16,
-                   xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
-                   xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16,
-                   xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
-
-      row_offset = (this_mv.as_mv.row > 0) ?
-        ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3);
-      col_offset = (this_mv.as_mv.col > 0) ?
-        ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3);
-      offset = ref_y_stride * row_offset + col_offset;
-
-      sad = vp8_sad16x2_c(above_src, xd->dst.y_stride,
-                          above_ref + offset, ref_y_stride, INT_MAX);
-
-      sad += vp8_sad2x16_c(left_src, xd->dst.y_stride,
-                           left_ref + offset, ref_y_stride, INT_MAX);
-
-      if (sad < bestsad) {
-        bestsad = sad;
-        best_mv->as_int = this_mv.as_int;
-      }
+    int_mv this_mv;
+    int offset=0;
+    int row_offset, col_offset;
+
+    this_mv.as_int = mvlist[i].as_int;
+
+    // If we see a 0,0 vector for a second time we have reached the end of
+    // the list of valid candidate vectors.
+    if (!this_mv.as_int)
+      if (zero_seen)
+        break;
+      else
+        zero_seen = TRUE;
+
+    vp8_clamp_mv(&this_mv,
+                 xd->mb_to_left_edge - LEFT_TOP_MARGIN + 16,
+                 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+                 xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16,
+                 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+
+    row_offset = (this_mv.as_mv.row > 0) ?
+      ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3);
+    col_offset = (this_mv.as_mv.col > 0) ?
+      ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3);
+    offset = ref_y_stride * row_offset + col_offset;
+
+    sad = vp8_sad16x3_c(above_src, xd->dst.y_stride,
+                        above_ref + offset, ref_y_stride, INT_MAX);
+
+    sad += vp8_sad3x16_c(left_src, xd->dst.y_stride,
+                         left_ref + offset, ref_y_stride, INT_MAX);
+
+    // Add the entry to our list and then resort the list on score.
+    sad_scores[i] = sad;
+    sorted_mvs[i].as_int = this_mv.as_int;
+    j = i;
+    while (j > 0) {
+      if (sad_scores[j] < sad_scores[j-1]) {
+        sad_scores[j] = sad_scores[j-1];
+        sorted_mvs[j].as_int = sorted_mvs[j-1].as_int;
+        sad_scores[j-1] = sad;
+        sorted_mvs[j-1].as_int = this_mv.as_int;
+        j--;
+      } else
+        break;
     }
   }
-  if (!xd->allow_high_precision_mv)
-    lower_mv_precision(best_mv);
 
-  vp8_clamp_mv2(best_mv, xd);
+  // Set the best mv to the first entry in the sorted list
+  best_mv->as_int = sorted_mvs[0].as_int;
 
-  if (best_mv->as_int != 0 &&
-      (best_mv->as_mv.row >> 3) != (nearest->as_mv.row >>3 ) &&
-      (best_mv->as_mv.col >> 3) != (nearest->as_mv.col >>3 )) {
-    near->as_int = nearest->as_int;
-    nearest->as_int = best_mv->as_int;
+  // Provided that there are non zero vectors available there will not
+  // be more than one 0,0 entry in the sorted list.
+  // The best ref mv is always set to the first entry (which gave the best
+  // results. The nearest is set to the first non zero vector if available and
+  // near to the second non zero vector if avaialable.
+  // We do not use 0,0 as a nearest or near as 0,0 has its own mode.
+  if ( sorted_mvs[0].as_int ) {
+    nearest->as_int = sorted_mvs[0].as_int;
+    if ( sorted_mvs[1].as_int )
+      near->as_int = sorted_mvs[1].as_int;
+    else
+      near->as_int = sorted_mvs[2].as_int;
+  } else {
+      nearest->as_int = sorted_mvs[1].as_int;
+      near->as_int = sorted_mvs[2].as_int;
   }
+
+  // Copy back the re-ordered mv list
+  vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs));
+  lower_mv_precision(best_mv, xd->allow_high_precision_mv);
+
+  vp8_clamp_mv2(best_mv, xd);
 }
 
-#endif
+#endif  // CONFIG_NEWBESTREFMV
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index e3cdab5ce..cd7b87adf 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -26,6 +26,7 @@
 void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
                            unsigned char *ref_y_buffer,
                            int ref_y_stride,
+                           int_mv *mvlist,
                            int_mv *best_mv,
                            int_mv *nearest,
                            int_mv *near);
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index d28024cda..32b5e5a6c 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -33,7 +33,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) {
   rtcd->idct.idct8        = vp8_short_idct8x8_c;
   rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c;
   rtcd->idct.ihaar2       = vp8_short_ihaar2x2_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   rtcd->idct.idct16x16    = vp8_short_idct16x16_c;
 #endif
   rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 2a410c34e..a4246c2a7 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -43,7 +43,7 @@
 #define Y2_WHT_UPSCALE_FACTOR 2
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_idct_idct16x16
 #define vp8_idct_idct16x16 vp8_short_idct16x16_c
 #endif
@@ -111,7 +111,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 #include "vp8/common/blockd.h"
 void vp8_ihtllm_c(short *input, short *output, int pitch,
                   TX_TYPE tx_type, int tx_dim);
@@ -136,7 +136,7 @@ typedef struct {
   vp8_idct_fn_t ihaar2;
   vp8_idct_fn_t ihaar2_1;
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_idct_fn_t            idct16x16;
 #endif
 } vp8_idct_rtcd_vtable_t;
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index bf019af06..b4475c628 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -37,7 +37,7 @@ static const int rounding = 0;
 
 // TODO: these transforms can be further converted into integer forms
 //       for complexity optimization
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 float idct_4[16] = {
   0.500000000000000,   0.653281482438188,   0.500000000000000,   0.270598050073099,
   0.500000000000000,   0.270598050073099,  -0.500000000000000,  -0.653281482438188,
@@ -89,11 +89,85 @@ float iadst_8[64] = {
   0.483002021635509,  -0.466553967085785,   0.434217976756762,  -0.387095214016348,
   0.326790388032145,  -0.255357107325375,   0.175227946595736,  -0.089131608307532
 };
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+float idct_16[256] = {
+  0.250000,  0.351851,  0.346760,  0.338330,  0.326641,  0.311806,  0.293969,  0.273300,
+  0.250000,  0.224292,  0.196424,  0.166664,  0.135299,  0.102631,  0.068975,  0.034654,
+  0.250000,  0.338330,  0.293969,  0.224292,  0.135299,  0.034654, -0.068975, -0.166664,
+ -0.250000, -0.311806, -0.346760, -0.351851, -0.326641, -0.273300, -0.196424, -0.102631,
+  0.250000,  0.311806,  0.196424,  0.034654, -0.135299, -0.273300, -0.346760, -0.338330,
+ -0.250000, -0.102631,  0.068975,  0.224292,  0.326641,  0.351851,  0.293969,  0.166664,
+  0.250000,  0.273300,  0.068975, -0.166664, -0.326641, -0.338330, -0.196424,  0.034654,
+  0.250000,  0.351851,  0.293969,  0.102631, -0.135299, -0.311806, -0.346760, -0.224292,
+  0.250000,  0.224292, -0.068975, -0.311806, -0.326641, -0.102631,  0.196424,  0.351851,
+  0.250000, -0.034654, -0.293969, -0.338330, -0.135299,  0.166664,  0.346760,  0.273300,
+  0.250000,  0.166664, -0.196424, -0.351851, -0.135299,  0.224292,  0.346760,  0.102631,
+ -0.250000, -0.338330, -0.068975,  0.273300,  0.326641,  0.034654, -0.293969, -0.311806,
+  0.250000,  0.102631, -0.293969, -0.273300,  0.135299,  0.351851,  0.068975, -0.311806,
+ -0.250000,  0.166664,  0.346760,  0.034654, -0.326641, -0.224292,  0.196424,  0.338330,
+  0.250000,  0.034654, -0.346760, -0.102631,  0.326641,  0.166664, -0.293969, -0.224292,
+  0.250000,  0.273300, -0.196424, -0.311806,  0.135299,  0.338330, -0.068975, -0.351851,
+  0.250000, -0.034654, -0.346760,  0.102631,  0.326641, -0.166664, -0.293969,  0.224292,
+  0.250000, -0.273300, -0.196424,  0.311806,  0.135299, -0.338330, -0.068975,  0.351851,
+  0.250000, -0.102631, -0.293969,  0.273300,  0.135299, -0.351851,  0.068975,  0.311806,
+ -0.250000, -0.166664,  0.346760, -0.034654, -0.326641,  0.224292,  0.196424, -0.338330,
+  0.250000, -0.166664, -0.196424,  0.351851, -0.135299, -0.224292,  0.346760, -0.102631,
+ -0.250000,  0.338330, -0.068975, -0.273300,  0.326641, -0.034654, -0.293969,  0.311806,
+  0.250000, -0.224292, -0.068975,  0.311806, -0.326641,  0.102631,  0.196424, -0.351851,
+  0.250000,  0.034654, -0.293969,  0.338330, -0.135299, -0.166664,  0.346760, -0.273300,
+  0.250000, -0.273300,  0.068975,  0.166664, -0.326641,  0.338330, -0.196424, -0.034654,
+  0.250000, -0.351851,  0.293969, -0.102631, -0.135299,  0.311806, -0.346760,  0.224292,
+  0.250000, -0.311806,  0.196424, -0.034654, -0.135299,  0.273300, -0.346760,  0.338330,
+ -0.250000,  0.102631,  0.068975, -0.224292,  0.326641, -0.351851,  0.293969, -0.166664,
+  0.250000, -0.338330,  0.293969, -0.224292,  0.135299, -0.034654, -0.068975,  0.166664,
+ -0.250000,  0.311806, -0.346760,  0.351851, -0.326641,  0.273300, -0.196424,  0.102631,
+  0.250000, -0.351851,  0.346760, -0.338330,  0.326641, -0.311806,  0.293969, -0.273300,
+  0.250000, -0.224292,  0.196424, -0.166664,  0.135299, -0.102631,  0.068975, -0.034654
+};
+
+float iadst_16[256] = {
+  0.033094,  0.098087,  0.159534,  0.215215,  0.263118,  0.301511,  0.329007,  0.344612,
+  0.347761,  0.338341,  0.316693,  0.283599,  0.240255,  0.188227,  0.129396,  0.065889,
+  0.065889,  0.188227,  0.283599,  0.338341,  0.344612,  0.301511,  0.215215,  0.098087,
+ -0.033094, -0.159534, -0.263118, -0.329007, -0.347761, -0.316693, -0.240255, -0.129396,
+  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,  0.000000, -0.188227, -0.316693,
+ -0.344612, -0.263118, -0.098087,  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,
+  0.129396,  0.316693,  0.329007,  0.159534, -0.098087, -0.301511, -0.338341, -0.188227,
+  0.065889,  0.283599,  0.344612,  0.215215, -0.033094, -0.263118, -0.347761, -0.240255,
+  0.159534,  0.344612,  0.240255, -0.065889, -0.316693, -0.301511, -0.033094,  0.263118,
+  0.338341,  0.129396, -0.188227, -0.347761, -0.215215,  0.098087,  0.329007,  0.283599,
+  0.188227,  0.344612,  0.098087, -0.263118, -0.316693, -0.000000,  0.316693,  0.263118,
+ -0.098087, -0.344612, -0.188227,  0.188227,  0.344612,  0.098087, -0.263118, -0.316693,
+  0.215215,  0.316693, -0.065889, -0.347761, -0.098087,  0.301511,  0.240255, -0.188227,
+ -0.329007,  0.033094,  0.344612,  0.129396, -0.283599, -0.263118,  0.159534,  0.338341,
+  0.240255,  0.263118, -0.215215, -0.283599,  0.188227,  0.301511, -0.159534, -0.316693,
+  0.129396,  0.329007, -0.098087, -0.338341,  0.065889,  0.344612, -0.033094, -0.347761,
+  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,  0.000000, -0.344612,  0.098087,
+  0.316693, -0.188227, -0.263118,  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,
+  0.283599,  0.098087, -0.347761,  0.129396,  0.263118, -0.301511, -0.065889,  0.344612,
+ -0.159534, -0.240255,  0.316693,  0.033094, -0.338341,  0.188227,  0.215215, -0.329007,
+  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000,
+ -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,
+  0.316693, -0.098087, -0.188227,  0.344612, -0.263118, -0.000000,  0.263118, -0.344612,
+  0.188227,  0.098087, -0.316693,  0.316693, -0.098087, -0.188227,  0.344612, -0.263118,
+  0.329007, -0.188227, -0.033094,  0.240255, -0.344612,  0.301511, -0.129396, -0.098087,
+  0.283599, -0.347761,  0.263118, -0.065889, -0.159534,  0.316693, -0.338341,  0.215215,
+  0.338341, -0.263118,  0.129396,  0.033094, -0.188227,  0.301511, -0.347761,  0.316693,
+ -0.215215,  0.065889,  0.098087, -0.240255,  0.329007, -0.344612,  0.283599, -0.159534,
+  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,  0.000000, -0.098087,  0.188227,
+ -0.263118,  0.316693, -0.344612,  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,
+  0.347761, -0.344612,  0.338341, -0.329007,  0.316693, -0.301511,  0.283599, -0.263118,
+  0.240255, -0.215215,  0.188227, -0.159534,  0.129396, -0.098087,  0.065889, -0.033094
+};
+#endif
 
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_ihtllm_c(short *input, short *output, int pitch,
                   TX_TYPE tx_type, int tx_dim) {
   int i, j, k;
-  float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+  float bufa[256], bufb[256]; // buffers are for floating-point test purpose
                             // the implementation could be simplified in
                             // conjunction with integer transform
 
@@ -126,11 +200,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
   switch(tx_type) {
     case ADST_ADST :
     case ADST_DCT  :
-      ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+      ptv = (tx_dim == 4) ? &iadst_4[0] :
+                            ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
       break;
 
     default :
-      ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+      ptv = (tx_dim == 4) ? &idct_4[0] :
+                            ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
       break;
   }
 
@@ -155,11 +231,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
   switch(tx_type) {
     case ADST_ADST :
     case  DCT_ADST :
-      pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+      pth = (tx_dim == 4) ? &iadst_4[0] :
+                            ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
       break;
 
     default :
-      pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+      pth = (tx_dim == 4) ? &idct_4[0] :
+                            ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
       break;
   }
 
@@ -178,11 +256,13 @@ void vp8_ihtllm_c(short *input, short *output, int pitch,
     switch(tx_type) {
       case ADST_ADST :
       case  DCT_ADST :
-        pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+        pth = (tx_dim == 4) ? &iadst_4[0] :
+                              ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
         break;
 
       default :
-        pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+        pth = (tx_dim == 4) ? &idct_4[0] :
+                              ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
         break;
     }
   }
@@ -692,7 +772,7 @@ void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) {
 }
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #if 0
 // Keep a really bad float version as reference for now.
 void vp8_short_idct16x16_c(short *input, short *output, int pitch) {
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 1d63f465a..9fc94eab8 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -171,7 +171,7 @@ void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd,
 
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd,
                                    short *input_dqcoeff,
                                    short *output_coeff, int pitch) {
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 76258d435..2097c368c 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -30,7 +30,7 @@ extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MAC
 extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd);
 extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd,
                                           short *input_dqcoeff, short *output_coeff,
                                           int pitch);
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index d9c4b54be..727326cba 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -336,7 +336,7 @@ void vp8_loop_filter_frame
             (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -361,7 +361,7 @@ void vp8_loop_filter_frame
             (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -479,7 +479,7 @@ void vp8_loop_filter_frame_yonly
             (y_ptr, 0, 0, post->y_stride, 0, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -497,7 +497,7 @@ void vp8_loop_filter_frame_yonly
             (y_ptr, 0, 0, post->y_stride, 0, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
diff --git a/vp8/common/mvref_common.c b/vp8/common/mvref_common.c
new file mode 100644
index 000000000..b6040cd59
--- /dev/null
+++ b/vp8/common/mvref_common.c
@@ -0,0 +1,349 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "mvref_common.h"
+
+#if CONFIG_NEWBESTREFMV
+
+#define MVREF_NEIGHBOURS 8
+static int mv_ref_search[MVREF_NEIGHBOURS][2] =
+  { {0,-1},{-1,0},{-1,-1},{0,-2},{-2,0},{-1,-2},{-2,-1},{-2,-2} };
+static int ref_distance_weight[MVREF_NEIGHBOURS] =
+  { 3,3,2,1,1,1,1,1 };
+
+// clamp_mv
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
+static void clamp_mv(const MACROBLOCKD *xd, int_mv *mv) {
+
+  if (mv->as_mv.col < (xd->mb_to_left_edge - MV_BORDER))
+    mv->as_mv.col = xd->mb_to_left_edge - MV_BORDER;
+  else if (mv->as_mv.col > xd->mb_to_right_edge + MV_BORDER)
+    mv->as_mv.col = xd->mb_to_right_edge + MV_BORDER;
+
+  if (mv->as_mv.row < (xd->mb_to_top_edge - MV_BORDER))
+    mv->as_mv.row = xd->mb_to_top_edge - MV_BORDER;
+  else if (mv->as_mv.row > xd->mb_to_bottom_edge + MV_BORDER)
+    mv->as_mv.row = xd->mb_to_bottom_edge + MV_BORDER;
+}
+
+// Code for selecting / building and entropy coding a motion vector reference
+// Returns a seperation value for two vectors.
+// This is taken as the sum of the abs x and y difference.
+unsigned int mv_distance(int_mv *mv1, int_mv *mv2) {
+  return (abs(mv1->as_mv.row - mv2->as_mv.row) +
+          abs(mv1->as_mv.col - mv2->as_mv.col));
+}
+
+// Gets a best matching candidate refenence motion vector
+// from the given mode info structure (if available)
+int get_candidate_mvref(
+  const MODE_INFO *candidate_mi,
+  MV_REFERENCE_FRAME ref_frame,
+  MV_REFERENCE_FRAME *c_ref_frame,
+  int_mv *c_mv,
+  MV_REFERENCE_FRAME *c2_ref_frame,
+  int_mv *c2_mv
+) {
+
+  int ret_val = FALSE;
+  c2_mv->as_int = 0;
+  *c2_ref_frame = INTRA_FRAME;
+
+  // Target ref frame matches candidate first ref frame
+  if (ref_frame == candidate_mi->mbmi.ref_frame) {
+    c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
+    *c_ref_frame = ref_frame;
+    ret_val = TRUE;
+
+    // Is there a second non zero vector we can use.
+    if ((candidate_mi->mbmi.second_ref_frame != INTRA_FRAME) &&
+        (candidate_mi->mbmi.mv[1].as_int != 0) &&
+        (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) {
+      c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+      *c2_ref_frame = candidate_mi->mbmi.second_ref_frame;
+    }
+
+  // Target ref frame matches candidate second ref frame
+  } else if (ref_frame == candidate_mi->mbmi.second_ref_frame) {
+    c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+    *c_ref_frame = ref_frame;
+    ret_val = TRUE;
+
+    // Is there a second non zero vector we can use.
+    if ((candidate_mi->mbmi.ref_frame != INTRA_FRAME) &&
+        (candidate_mi->mbmi.mv[0].as_int != 0) &&
+        (candidate_mi->mbmi.mv[0].as_int != c_mv->as_int)) {
+      c2_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
+      *c2_ref_frame = candidate_mi->mbmi.ref_frame;
+    }
+
+  // No ref frame matches so use first ref mv as first choice
+  } else if (candidate_mi->mbmi.ref_frame != INTRA_FRAME) {
+    c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
+    *c_ref_frame = candidate_mi->mbmi.ref_frame;
+    ret_val = TRUE;
+
+    // Is there a second non zero vector we can use.
+    if ((candidate_mi->mbmi.second_ref_frame != INTRA_FRAME) &&
+        (candidate_mi->mbmi.mv[1].as_int != 0) &&
+        (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) {
+      c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+      *c2_ref_frame = candidate_mi->mbmi.second_ref_frame;
+    }
+
+  // If only the second ref mv is valid:- (Should not trigger in current code
+  // base given current possible compound prediction options).
+  } else if (candidate_mi->mbmi.second_ref_frame != INTRA_FRAME) {
+    c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
+    *c_ref_frame = candidate_mi->mbmi.second_ref_frame;
+    ret_val = TRUE;
+  }
+
+  return ret_val;
+}
+
+// Performs mv adjustment based on reference frame and clamps the MV
+// if it goes off the edge of the buffer.
+void scale_mv(
+  MACROBLOCKD *xd,
+  MV_REFERENCE_FRAME this_ref_frame,
+  MV_REFERENCE_FRAME candidate_ref_frame,
+  int_mv *candidate_mv,
+  int *ref_sign_bias
+) {
+
+  if (candidate_ref_frame != this_ref_frame) {
+
+    //int frame_distances[MAX_REF_FRAMES];
+    //int last_distance = 1;
+    //int gf_distance = xd->frames_since_golden;
+    //int arf_distance = xd->frames_till_alt_ref_frame;
+
+    // Sign inversion where appropriate.
+    if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) {
+      candidate_mv->as_mv.row = -candidate_mv->as_mv.row;
+      candidate_mv->as_mv.col = -candidate_mv->as_mv.col;
+    }
+
+    // Scale based on frame distance if the reference frames not the same.
+    /*frame_distances[INTRA_FRAME] = 1;   // should never be used
+    frame_distances[LAST_FRAME] = 1;
+    frame_distances[GOLDEN_FRAME] =
+      (xd->frames_since_golden) ? xd->frames_since_golden : 1;
+    frame_distances[ALTREF_FRAME] =
+      (xd->frames_till_alt_ref_frame) ? xd->frames_till_alt_ref_frame : 1;
+
+    if (frame_distances[this_ref_frame] &&
+        frame_distances[candidate_ref_frame]) {
+      candidate_mv->as_mv.row =
+        (short)(((int)(candidate_mv->as_mv.row) *
+                 frame_distances[this_ref_frame]) /
+                frame_distances[candidate_ref_frame]);
+
+      candidate_mv->as_mv.col =
+        (short)(((int)(candidate_mv->as_mv.col) *
+                 frame_distances[this_ref_frame]) /
+                frame_distances[candidate_ref_frame]);
+    }
+    */
+  }
+
+  // Clamp the MV so it does not point out of the frame buffer
+  clamp_mv(xd, candidate_mv);
+}
+
+// Adds a new candidate reference vector to the list if indeed it is new.
+// If it is not new then the score of the existing candidate that it matches
+// is increased and the list is resorted.
+void addmv_and_shuffle(
+  int_mv *mv_list,
+  int *mv_scores,
+  int *index,
+  int_mv candidate_mv,
+  int weight
+) {
+
+  int i = *index;
+  int duplicate_found = FALSE;
+
+  // Check for duplicates. If there is one increment its score.
+  // Duplicate defined as being the same full pel vector with rounding.
+  while (i > 0) {
+    i--;
+
+    if (candidate_mv.as_int == mv_list[i].as_int) {
+      duplicate_found = TRUE;
+      mv_scores[i] += weight;
+      break;
+    }
+  }
+
+  // If no duplicate was found add the new vector and give it a weight
+  if (!duplicate_found) {
+    mv_list[*index].as_int = candidate_mv.as_int;
+    mv_scores[*index] = weight;
+    i = *index;
+    (*index)++;
+  }
+
+  // Reshuffle the list so that highest scoring mvs at the top.
+  while (i > 0) {
+    if (mv_scores[i] > mv_scores[i-1]) {
+      int tmp_score = mv_scores[i-1];
+      int_mv tmp_mv = mv_list[i-1];
+
+      mv_scores[i-1] = mv_scores[i];
+      mv_list[i-1] = mv_list[i];
+      mv_scores[i] = tmp_score;
+      mv_list[i] = tmp_mv;
+      i--;
+    } else
+      break;
+  }
+}
+
+// This function searches the neighbourhood of a given MB/SB and populates a
+// list of candidate reference vectors.
+//
+void find_mv_refs(
+  MACROBLOCKD *xd,
+  MODE_INFO *here,
+  MODE_INFO *lf_here,
+  MV_REFERENCE_FRAME ref_frame,
+  int_mv *mv_ref_list,
+  int *ref_sign_bias
+) {
+
+  int i;
+  MODE_INFO *candidate_mi;
+  int_mv candidate_mvs[MAX_MV_REFS];
+  int_mv c_refmv;
+  MV_REFERENCE_FRAME c_ref_frame;
+  int_mv c2_refmv;
+  MV_REFERENCE_FRAME c2_ref_frame;
+  int candidate_scores[MAX_MV_REFS];
+  int index = 0;
+  int ref_weight = 0;
+  int valid_mv_ref;
+
+  // Blank the reference vector lists and other local structures.
+  vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REFS);
+  vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REFS);
+  vpx_memset(candidate_scores, 0, sizeof(candidate_scores));
+
+  // Populate a list with candidate reference vectors from the
+  // spatial neighbours.
+  for (i = 0; i < 2; ++i) {
+    if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+        ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
+
+      candidate_mi = here + mv_ref_search[i][0] +
+                     (mv_ref_search[i][1] * xd->mode_info_stride);
+
+      valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame,
+                                         &c_ref_frame, &c_refmv,
+                                         &c2_ref_frame, &c2_refmv);
+
+      // If there is a valid MV candidate then add it to the list
+      if (valid_mv_ref) {
+        scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias );
+        ref_weight = ref_distance_weight[i] +
+                     ((c_ref_frame == ref_frame) << 4);
+
+        addmv_and_shuffle(candidate_mvs, candidate_scores,
+                          &index, c_refmv, ref_weight);
+
+        // If there is a second valid mv then add it as well.
+        if (c2_ref_frame != INTRA_FRAME) {
+          scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias );
+          ref_weight = ref_distance_weight[i] +
+                       ((c2_ref_frame == ref_frame) << 4);
+
+          addmv_and_shuffle(candidate_mvs, candidate_scores,
+                            &index, c2_refmv, ref_weight);
+        }
+      }
+    }
+  }
+
+  // Look at the corresponding vector in the last frame
+  candidate_mi = lf_here;
+  valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame,
+                                     &c_ref_frame, &c_refmv,
+                                     &c2_ref_frame, &c2_refmv);
+
+  // If there is a valid MV candidate then add it to the list
+  if (valid_mv_ref) {
+    scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias );
+    ref_weight = 2 + ((c_ref_frame == ref_frame) << 4);
+    addmv_and_shuffle(candidate_mvs, candidate_scores,
+                      &index, c_refmv, ref_weight);
+
+    // If there is a second valid mv then add it as well.
+    if (c2_ref_frame != INTRA_FRAME) {
+      scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias );
+      ref_weight = ref_distance_weight[i] +
+                   ((c2_ref_frame == ref_frame) << 4);
+
+      addmv_and_shuffle(candidate_mvs, candidate_scores,
+                        &index, c2_refmv, ref_weight);
+    }
+  }
+
+  // Populate a list with candidate reference vectors from the
+  // spatial neighbours.
+  for (i = 2; i < MVREF_NEIGHBOURS; ++i) {
+    if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+        ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
+
+      candidate_mi = here + mv_ref_search[i][0] +
+                     (mv_ref_search[i][1] * xd->mode_info_stride);
+
+      valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame,
+                                         &c_ref_frame, &c_refmv,
+                                         &c2_ref_frame, &c2_refmv);
+
+      // If there is a valid MV candidate then add it to the list
+      if (valid_mv_ref) {
+        scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias );
+        ref_weight = ref_distance_weight[i] +
+                     ((c_ref_frame == ref_frame) << 4);
+
+        addmv_and_shuffle(candidate_mvs, candidate_scores,
+                          &index, c_refmv, ref_weight);
+
+        // If there is a second valid mv then add it as well.
+        if (c2_ref_frame != INTRA_FRAME) {
+          scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias );
+          ref_weight = ref_distance_weight[i] +
+                       ((c2_ref_frame == ref_frame) << 4);
+
+          addmv_and_shuffle(candidate_mvs, candidate_scores,
+                            &index, c2_refmv, ref_weight);
+        }
+      }
+    }
+  }
+
+  // 0,0 is always a valid reference.
+  for (i = 0; i < index; ++i)
+    if (candidate_mvs[i].as_int == 0)
+      break;
+  if (i == index) {
+    c_refmv.as_int = 0;
+    addmv_and_shuffle(candidate_mvs, candidate_scores,
+                      &index, c_refmv, candidate_scores[3]+1 );
+  }
+
+  // Copy over the candidate list.
+  vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs));
+}
+
+#endif
diff --git a/vp8/common/mvref_common.h b/vp8/common/mvref_common.h
new file mode 100644
index 000000000..3f19ddbdb
--- /dev/null
+++ b/vp8/common/mvref_common.h
@@ -0,0 +1,33 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "onyxc_int.h"
+#include "blockd.h"
+
+// MR reference entropy header file.
+#if CONFIG_NEWBESTREFMV
+
+#ifndef __INC_MVREF_COMMON_H
+#define __INC_MVREF_COMMON_H
+
+unsigned int mv_distance(int_mv *mv1, int_mv *mv2);
+
+void find_mv_refs(
+  MACROBLOCKD *xd,
+  MODE_INFO *here,
+  MODE_INFO *lf_here,
+  MV_REFERENCE_FRAME ref_frame,
+  int_mv * mv_ref_list,
+  int *ref_sign_bias
+);
+
+#endif
+
+#endif
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index b7a543220..7c6093b41 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -51,14 +51,29 @@ typedef struct frame_contexts {
   vp8_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP8_SUBMVREFS - 1];
   vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1];
   vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_HYBRIDTRANSFORM
+  vp8_prob hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#endif
   vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_HYBRIDTRANSFORM8X8
+  vp8_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#endif
 #if CONFIG_TX16X16
   vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_HYBRIDTRANSFORM16X16
+  vp8_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
+#endif
+
+#if CONFIG_NEWMVENTROPY
+  nmv_context nmvc;
+  nmv_context pre_nmvc;
+#else
   MV_CONTEXT mvc[2];
   MV_CONTEXT_HP mvc_hp[2];
   MV_CONTEXT pre_mvc[2];
   MV_CONTEXT_HP pre_mvc_hp[2];
+#endif
   vp8_prob pre_bmode_prob [VP8_BINTRAMODES - 1];
   vp8_prob pre_ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */
   vp8_prob pre_uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1];
@@ -74,22 +89,56 @@ typedef struct frame_contexts {
 
   vp8_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_HYBRIDTRANSFORM
+  vp8_prob pre_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS]
+      [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#endif
+
   vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_HYBRIDTRANSFORM8X8
+  vp8_prob pre_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
+      [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#endif
+
 #if CONFIG_TX16X16
   vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+#if CONFIG_HYBRIDTRANSFORM16X16
+  vp8_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
+      [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
+#endif
+
   unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+#if CONFIG_HYBRIDTRANSFORM
+  unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS]
+      [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+#endif
+
   unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+#if CONFIG_HYBRIDTRANSFORM8X8
+  unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
+      [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+#endif
+
 #if CONFIG_TX16X16
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+#if CONFIG_HYBRIDTRANSFORM16X16
+  unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
+      [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
+#endif
 #endif
+
+#if CONFIG_NEWMVENTROPY
+  nmv_context_counts NMVcount;
+#else
   unsigned int MVcount [2] [MVvals];
   unsigned int MVcount_hp [2] [MVvals_hp];
+#endif
 #if CONFIG_SWITCHABLE_INTERP
   vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS+1]
                                  [VP8_SWITCHABLE_FILTERS-1];
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index 3527fc14d..0bb5c8863 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -262,4 +262,12 @@ typedef struct vp8_recon_rtcd_vtable {
 
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd,
                           MACROBLOCKD *xd);
+
+#if CONFIG_SUPERBLOCKS
+extern void vp8_recon_mby_s_c(const vp8_recon_rtcd_vtable_t *rtcd,
+                              MACROBLOCKD *xd, uint8_t *dst);
+extern void vp8_recon_mbuv_s_c(const vp8_recon_rtcd_vtable_t *rtcd,
+                               MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst);
+#endif
+
 #endif
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 0212c92c7..647b3ada7 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -723,9 +723,9 @@ void vp8_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
 
         // Sub-pel filter
         xd->subpixel_predict8x8(pTemp, len,
-                               _o16x16mv.as_mv.col & 15,
-                               _o16x16mv.as_mv.row & 15,
-                               pDst, dst_uvstride);
+                                _o16x16mv.as_mv.col & 15,
+                                _o16x16mv.as_mv.row & 15,
+                                pDst, dst_uvstride);
       } else {
         filter_mb(pSrc, pre_stride, pDst, dst_uvstride, 8, 8);
       }
@@ -750,7 +750,6 @@ void vp8_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
 }
 
 
-
 void vp8_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
                                             unsigned char *dst_y,
                                             unsigned char *dst_u,
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 7ad0adbd4..37e34b5e1 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -45,6 +45,15 @@ extern void vp8_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd,
                                                    int dst_ystride,
                                                    int dst_uvstride);
 
+#if CONFIG_SUPERBLOCKS
+extern void vp8_build_inter32x32_predictors_sb(MACROBLOCKD *x,
+                                               unsigned char *dst_y,
+                                               unsigned char *dst_u,
+                                               unsigned char *dst_v,
+                                               int dst_ystride,
+                                               int dst_uvstride);
+#endif
+
 extern void vp8_build_inter_predictors_mb(MACROBLOCKD *xd);
 
 extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index e391fa9be..cad9652b7 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -207,10 +207,10 @@ void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd,
   }
 }
 
-void vp8_build_intra_predictors_internal(MACROBLOCKD *xd,
-                                         unsigned char *src, int src_stride,
+void vp8_build_intra_predictors_internal(unsigned char *src, int src_stride,
                                          unsigned char *ypred_ptr,
-                                         int y_stride, int mode, int bsize) {
+                                         int y_stride, int mode, int bsize,
+                                         int up_available, int left_available) {
 
   unsigned char *yabove_row = src - src_stride;
   unsigned char yleft_col[32];
@@ -218,7 +218,7 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd,
   int r, c, i;
 
   for (i = 0; i < bsize; i++) {
-    yleft_col[i] = xd->dst.y_buffer [i * src_stride - 1];
+    yleft_col[i] = src[i * src_stride - 1];
   }
 
   /* for Y */
@@ -230,8 +230,10 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd,
       int average = 0;
       int log2_bsize_minus_1;
 
-      assert(bsize == 8 || bsize == 16 || bsize == 32);
-      if (bsize == 8) {
+      assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32);
+      if (bsize == 4) {
+        log2_bsize_minus_1 = 1;
+      } else if (bsize == 8) {
         log2_bsize_minus_1 = 2;
       } else if (bsize == 16) {
         log2_bsize_minus_1 = 3;
@@ -239,19 +241,19 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd,
         log2_bsize_minus_1 = 4;
       }
 
-      if (xd->up_available || xd->left_available) {
-        if (xd->up_available) {
+      if (up_available || left_available) {
+        if (up_available) {
           for (i = 0; i < bsize; i++) {
             average += yabove_row[i];
           }
         }
 
-        if (xd->left_available) {
+        if (left_available) {
           for (i = 0; i < bsize; i++) {
             average += yleft_col[i];
           }
         }
-        shift = log2_bsize_minus_1 + xd->up_available + xd->left_available;
+        shift = log2_bsize_minus_1 + up_available + left_available;
         expected_dc = (average + (1 << (shift - 1))) >> shift;
       } else {
         expected_dc = 128;
@@ -332,22 +334,25 @@ void vp8_build_intra_predictors_internal(MACROBLOCKD *xd,
 }
 
 void vp8_build_intra_predictors_mby(MACROBLOCKD *xd) {
-  vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
                                       xd->predictor, 16,
-                                      xd->mode_info_context->mbmi.mode, 16);
+                                      xd->mode_info_context->mbmi.mode, 16,
+                                      xd->up_available, xd->left_available);
 }
 
 void vp8_build_intra_predictors_mby_s(MACROBLOCKD *xd) {
-  vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
                                       xd->dst.y_buffer, xd->dst.y_stride,
-                                      xd->mode_info_context->mbmi.mode, 16);
+                                      xd->mode_info_context->mbmi.mode, 16,
+                                      xd->up_available, xd->left_available);
 }
 
 #if CONFIG_SUPERBLOCKS
-void vp8_build_intra_predictors_sby_s(MACROBLOCKD *x) {
-  vp8_build_intra_predictors_internal(x, x->dst.y_buffer, x->dst.y_stride,
-                                      x->dst.y_buffer, x->dst.y_stride,
-                                      x->mode_info_context->mbmi.mode, 32);
+void vp8_build_intra_predictors_sby_s(MACROBLOCKD *xd) {
+  vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
+                                      xd->dst.y_buffer, xd->dst.y_stride,
+                                      xd->mode_info_context->mbmi.mode, 32,
+                                      xd->up_available, xd->left_available);
 }
 #endif
 
@@ -356,14 +361,16 @@ void vp8_build_comp_intra_predictors_mby(MACROBLOCKD *xd) {
   unsigned char predictor[2][256];
   int i;
 
-  vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
                                       predictor[0], 16,
                                       xd->mode_info_context->mbmi.mode,
-                                      16);
-  vp8_build_intra_predictors_internal(xd, xd->dst.y_buffer, xd->dst.y_stride,
+                                      16, xd->up_available,
+                                      xd->left_available);
+  vp8_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
                                       predictor[1], 16,
                                       xd->mode_info_context->mbmi.second_mode,
-                                      16);
+                                      16, xd->up_available,
+                                      xd->left_available);
 
   for (i = 0; i < 256; i++) {
     xd->predictor[i] = (predictor[0][i] + predictor[1][i] + 1) >> 1;
@@ -376,10 +383,12 @@ void vp8_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd,
                                               unsigned char *vpred_ptr,
                                               int uv_stride,
                                               int mode, int bsize) {
-  vp8_build_intra_predictors_internal(xd, xd->dst.u_buffer, xd->dst.uv_stride,
-                                      upred_ptr, uv_stride, mode, bsize);
-  vp8_build_intra_predictors_internal(xd, xd->dst.v_buffer, xd->dst.uv_stride,
-                                      vpred_ptr, uv_stride, mode, bsize);
+  vp8_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride,
+                                      upred_ptr, uv_stride, mode, bsize,
+                                      xd->up_available, xd->left_available);
+  vp8_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride,
+                                      vpred_ptr, uv_stride, mode, bsize,
+                                      xd->up_available, xd->left_available);
 }
 
 void vp8_build_intra_predictors_mbuv(MACROBLOCKD *xd) {
@@ -428,95 +437,9 @@ void vp8_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) {
 void vp8_intra8x8_predict(BLOCKD *xd,
                           int mode,
                           unsigned char *predictor) {
-
-  unsigned char *yabove_row = *(xd->base_dst) + xd->dst - xd->dst_stride;
-  unsigned char yleft_col[8];
-  unsigned char ytop_left = yabove_row[-1];
-  int r, c, i;
-
-  for (i = 0; i < 8; i++) {
-    yleft_col[i] = (*(xd->base_dst))[xd->dst - 1 + i * xd->dst_stride];
-  }
-  switch (mode) {
-    case DC_PRED: {
-      int expected_dc = 0;
-
-      for (i = 0; i < 8; i++) {
-        expected_dc += yabove_row[i];
-        expected_dc += yleft_col[i];
-      }
-      expected_dc = (expected_dc + 8) >> 4;
-
-      for (r = 0; r < 8; r++) {
-        for (c = 0; c < 8; c++) {
-          predictor[c] = expected_dc;
-        }
-        predictor += 16;
-      }
-    }
-    break;
-    case V_PRED: {
-      for (r = 0; r < 8; r++) {
-        for (c = 0; c < 8; c++) {
-          predictor[c] = yabove_row[c];
-        }
-        predictor += 16;
-      }
-
-    }
-    break;
-    case H_PRED: {
-
-      for (r = 0; r < 8; r++) {
-        for (c = 0; c < 8; c++) {
-          predictor[c] = yleft_col[r];
-        }
-        predictor += 16;
-      }
-    }
-    break;
-    case TM_PRED: {
-      /* prediction similar to true_motion prediction */
-      for (r = 0; r < 8; r++) {
-        for (c = 0; c < 8; c++) {
-          int pred = yabove_row[c] - ytop_left + yleft_col[r];
-          if (pred < 0)
-            pred = 0;
-
-          if (pred > 255)
-            pred = 255;
-          predictor[c] = pred;
-        }
-
-        predictor += 16;
-      }
-    }
-    break;
-    case D45_PRED: {
-      d45_predictor(predictor, 16, 8,  yabove_row, yleft_col);
-    }
-    break;
-    case D135_PRED: {
-      d135_predictor(predictor, 16, 8,  yabove_row, yleft_col);
-    }
-    break;
-    case D117_PRED: {
-      d117_predictor(predictor, 16, 8,  yabove_row, yleft_col);
-    }
-    break;
-    case D153_PRED: {
-      d153_predictor(predictor, 16, 8,  yabove_row, yleft_col);
-    }
-    break;
-    case D27_PRED: {
-      d27_predictor(predictor, 16, 8,  yabove_row, yleft_col);
-    }
-    break;
-    case D63_PRED: {
-      d63_predictor(predictor, 16, 8,  yabove_row, yleft_col);
-    }
-    break;
-  }
+  vp8_build_intra_predictors_internal(*(xd->base_dst) + xd->dst,
+                                      xd->dst_stride, predictor, 16,
+                                      mode, 8, 1, 1);
 }
 
 #if CONFIG_COMP_INTRA_PRED
@@ -540,96 +463,9 @@ void vp8_comp_intra8x8_predict(BLOCKD *xd,
 void vp8_intra_uv4x4_predict(BLOCKD *xd,
                              int mode,
                              unsigned char *predictor) {
-
-  unsigned char *above_row = *(xd->base_dst) + xd->dst - xd->dst_stride;
-  unsigned char left_col[4];
-  unsigned char top_left = above_row[-1];
-  int r, c, i;
-
-  for (i = 0; i < 4; i++) {
-    left_col[i] = (*(xd->base_dst))[xd->dst - 1 + i * xd->dst_stride];
-  }
-  switch (mode) {
-    case DC_PRED: {
-      int expected_dc = 0;
-
-      for (i = 0; i < 4; i++) {
-        expected_dc += above_row[i];
-        expected_dc += left_col[i];
-      }
-      expected_dc = (expected_dc + 4) >> 3;
-
-      for (r = 0; r < 4; r++) {
-        for (c = 0; c < 4; c++) {
-          predictor[c] = expected_dc;
-        }
-        predictor += 8;
-      }
-    }
-    break;
-    case V_PRED: {
-      for (r = 0; r < 4; r++) {
-        for (c = 0; c < 4; c++) {
-
-          predictor[c] = above_row[c];
-        }
-        predictor += 8;
-      }
-
-    }
-    break;
-    case H_PRED: {
-
-      for (r = 0; r < 4; r++) {
-        for (c = 0; c < 4; c++) {
-          predictor[c] = left_col[r];
-        }
-        predictor += 8;
-      }
-    }
-    break;
-    case TM_PRED: {
-      /* prediction similar to true_motion prediction */
-      for (r = 0; r < 4; r++) {
-        for (c = 0; c < 4; c++) {
-          int pred = above_row[c] - top_left + left_col[r];
-          if (pred < 0)
-            pred = 0;
-
-          if (pred > 255)
-            pred = 255;
-          predictor[c] = pred;
-        }
-
-        predictor += 8;
-      }
-    }
-    break;
-    case D45_PRED: {
-      d45_predictor(predictor, 8, 4,  above_row, left_col);
-    }
-    break;
-    case D135_PRED: {
-      d135_predictor(predictor, 8, 4,  above_row, left_col);
-    }
-    break;
-    case D117_PRED: {
-      d117_predictor(predictor, 8, 4,  above_row, left_col);
-    }
-    break;
-    case D153_PRED: {
-      d153_predictor(predictor, 8, 4,  above_row, left_col);
-    }
-    break;
-    case D27_PRED: {
-      d27_predictor(predictor, 8, 4,  above_row, left_col);
-    }
-    break;
-    case D63_PRED: {
-      d63_predictor(predictor, 8, 4,  above_row, left_col);
-    }
-    break;
-  }
+  vp8_build_intra_predictors_internal(*(xd->base_dst) + xd->dst,
+                                      xd->dst_stride, predictor, 8,
+                                      mode, 4, 1, 1);
 }
 
 #if CONFIG_COMP_INTRA_PRED
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 1cb5de311..66029f88e 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -14,8 +14,8 @@ prototype void vp8_filter_block2d_16x16_8 "const unsigned char *src_ptr, const u
 # compiles warning free but a dissassembly of generated code show bugs. To be
 # on the safe side, only enabled when compiled with 'gcc'.
 if [ "$CONFIG_GCC" = "yes" ]; then
-    specialize vp8_filter_block2d_4x4_8 sse4_1
-    specialize vp8_filter_block2d_8x4_8 sse4_1
-    specialize vp8_filter_block2d_8x8_8 sse4_1
-    specialize vp8_filter_block2d_16x16_8 sse4_1
+    specialize vp8_filter_block2d_4x4_8 sse4_1 sse2
+    specialize vp8_filter_block2d_8x4_8 sse4_1 sse2
+    specialize vp8_filter_block2d_8x8_8 sse4_1 sse2
+    specialize vp8_filter_block2d_16x16_8 sse4_1 sse2
 fi
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index def4caa04..adf291bef 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -124,3 +124,15 @@ void vp8_tree_probs_from_distribution(
       probs[t] = vp8_prob_half;
   } while (++t < tree_len);
 }
+
+vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]) {
+  int tot_count = counts[0] + counts[1];
+  vp8_prob prob;
+  if (tot_count) {
+    prob = (counts[0] * 255 + (tot_count >> 1)) / tot_count;
+    prob += !prob;
+  } else {
+    prob = 128;
+  }
+  return prob;
+}
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index c4d0aa6ee..b7fa17df9 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -85,5 +85,6 @@ void vp8bc_tree_probs_from_distribution(
   c_bool_coder_spec *s
 );
 
+vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]);
 
 #endif
diff --git a/vp8/common/x86/filter_sse2.c b/vp8/common/x86/filter_sse2.c
new file mode 100644
index 000000000..fe57b4e0b
--- /dev/null
+++ b/vp8/common/x86/filter_sse2.c
@@ -0,0 +1,289 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h> // for alignment checks
+#include <emmintrin.h> // SSE2
+#include "vp8/common/filter.h"
+#include "vpx_ports/mem.h" // for DECLARE_ALIGNED
+#include "vpx_rtcd.h"
+
+// TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
+//           just a quick partial snapshot so that other can already use some
+//           speedup.
+// TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap
+//           filtering.
+// TODO(cd): Add some comments, better variable naming.
+// TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum
+//           of positive above 128), or have higher precision filter
+//           coefficients.
+
+DECLARE_ALIGNED(16, static const unsigned int, rounding_c[4]) = {
+  VP8_FILTER_WEIGHT >> 1,
+  VP8_FILTER_WEIGHT >> 1,
+  VP8_FILTER_WEIGHT >> 1,
+  VP8_FILTER_WEIGHT >> 1,
+};
+
+// Creating a macro to do more than four pixels at once to hide instruction
+// latency is actually slower :-(
+#define DO_FOUR_PIXELS(result, src_ptr, offset)                                \
+  {                                                                            \
+  /* Do shifted load to achieve require shuffles through unpacking */          \
+  const __m128i src0  = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 0)); \
+  const __m128i src1  = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 1)); \
+  const __m128i src2  = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 2)); \
+  const __m128i src3  = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 3)); \
+  const __m128i src01 = _mm_unpacklo_epi8(src0, src1);                         \
+  const __m128i src01_16 = _mm_unpacklo_epi8(src01, zero);                     \
+  const __m128i src23 = _mm_unpacklo_epi8(src2, src3);                         \
+  const __m128i src23_16 = _mm_unpacklo_epi8(src23, zero);                     \
+  /* Shit by 4 bytes through suffle to get additional shifted loads */         \
+  const __m128i src4  = _mm_shuffle_epi32(src0, _MM_SHUFFLE(3, 3, 2, 1));      \
+  const __m128i src5  = _mm_shuffle_epi32(src1, _MM_SHUFFLE(3, 3, 2, 1));      \
+  const __m128i src6  = _mm_shuffle_epi32(src2, _MM_SHUFFLE(3, 3, 2, 1));      \
+  const __m128i src7  = _mm_shuffle_epi32(src3, _MM_SHUFFLE(3, 3, 2, 1));      \
+  const __m128i src45 = _mm_unpacklo_epi8(src4, src5);                         \
+  const __m128i src45_16 = _mm_unpacklo_epi8(src45, zero);                     \
+  const __m128i src67 = _mm_unpacklo_epi8(src6, src7);                         \
+  const __m128i src67_16 = _mm_unpacklo_epi8(src67, zero);                     \
+  /* multiply accumulate them */                                               \
+  const __m128i mad01 = _mm_madd_epi16(src01_16, fil01);                       \
+  const __m128i mad23 = _mm_madd_epi16(src23_16, fil23);                       \
+  const __m128i mad45 = _mm_madd_epi16(src45_16, fil45);                       \
+  const __m128i mad67 = _mm_madd_epi16(src67_16, fil67);                       \
+  const __m128i mad0123 = _mm_add_epi32(mad01, mad23);                         \
+  const __m128i mad4567 = _mm_add_epi32(mad45, mad67);                         \
+  __m128i mad_all = _mm_add_epi32(mad0123, mad4567);                           \
+  mad_all = _mm_add_epi32(mad_all, rounding);                                  \
+  result = _mm_srai_epi32(mad_all, VP8_FILTER_SHIFT);                          \
+  }
+
+void vp8_filter_block2d_4x4_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+  __m128i intermediateA, intermediateB, intermediateC;
+
+  const int kInterp_Extend = 4;
+
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i rounding = _mm_load_si128((const __m128i *)rounding_c);
+
+  // check alignment
+  assert(0 == ((long)HFilter_aligned16)%16);
+  assert(0 == ((long)VFilter_aligned16)%16);
+
+  {
+    __m128i transpose3_0;
+    __m128i transpose3_1;
+    __m128i transpose3_2;
+    __m128i transpose3_3;
+
+    // Horizontal pass (src -> intermediate).
+    {
+      const __m128i HFilter = _mm_load_si128((const __m128i *)HFilter_aligned16);
+      // get first two columns filter coefficients
+      __m128i fil01 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(0, 0, 0, 0));
+      __m128i fil23 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(1, 1, 1, 1));
+      __m128i fil45 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(2, 2, 2, 2));
+      __m128i fil67 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(3, 3, 3, 3));
+      src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+
+      {
+        __m128i mad_all0;
+        __m128i mad_all1;
+        __m128i mad_all2;
+        __m128i mad_all3;
+        DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
+        DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
+        DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
+        DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride)
+        mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+        mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
+        intermediateA = _mm_packus_epi16(mad_all0, mad_all2);
+        // --
+        src_ptr += src_stride*4;
+        // --
+        DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
+        DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
+        DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
+        DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride)
+        mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+        mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
+        intermediateB = _mm_packus_epi16(mad_all0, mad_all2);
+        // --
+        src_ptr += src_stride*4;
+        // --
+        DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
+        DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
+        DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
+        mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
+        mad_all2 = _mm_packs_epi32(mad_all2, mad_all2);
+        intermediateC = _mm_packus_epi16(mad_all0, mad_all2);
+      }
+    }
+
+    // Transpose result (intermediate -> transpose3_x)
+    {
+      // 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33
+      // 40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73
+      // 80 81 82 83 90 91 92 93 A0 A1 A2 A3 xx xx xx xx
+      const __m128i transpose0_0 = _mm_unpacklo_epi8(intermediateA, intermediateB);
+      const __m128i transpose0_1 = _mm_unpackhi_epi8(intermediateA, intermediateB);
+      const __m128i transpose0_2 = _mm_unpacklo_epi8(intermediateC, intermediateC);
+      const __m128i transpose0_3 = _mm_unpackhi_epi8(intermediateC, intermediateC);
+      // 00 40 01 41 02 42 03 43 10 50 11 51 12 52 13 53
+      // 20 60 21 61 22 62 23 63 30 70 31 71 32 72 33 73
+      // 80 xx 81 xx 82 xx 83 xx 90 xx 91 xx 92 xx 93 xx
+      // A0 xx A1 xx A2 xx A3 xx xx xx xx xx xx xx xx xx
+      const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+      const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+      const __m128i transpose1_2 = _mm_unpacklo_epi8(transpose0_2, transpose0_3);
+      const __m128i transpose1_3 = _mm_unpackhi_epi8(transpose0_2, transpose0_3);
+      // 00 20 40 60 01 21 41 61 02 22 42 62 03 23 43 63
+      // 10 30 50 70 11 31 51 71 12 32 52 72 13 33 53 73
+      // 80 A0 xx xx 81 A1 xx xx 82 A2 xx xx 83 A3 xx xx
+      // 90 xx xx xx 91 xx xx xx 92 xx xx xx 93 xx xx xx
+      const __m128i transpose2_0 = _mm_unpacklo_epi8(transpose1_0, transpose1_1);
+      const __m128i transpose2_1 = _mm_unpackhi_epi8(transpose1_0, transpose1_1);
+      const __m128i transpose2_2 = _mm_unpacklo_epi8(transpose1_2, transpose1_3);
+      const __m128i transpose2_3 = _mm_unpackhi_epi8(transpose1_2, transpose1_3);
+      // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+      // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+      // 80 90 A0 xx xx xx xx xx 81 91 A1 xx xx xx xx xx
+      // 82 92 A2 xx xx xx xx xx 83 93 A3 xx xx xx xx xx
+      transpose3_0 = _mm_castps_si128(
+                            _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
+                                           _mm_castsi128_ps(transpose2_2),
+                                           _MM_SHUFFLE(1, 0, 1, 0)));
+      transpose3_1 = _mm_castps_si128(
+                            _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
+                                           _mm_castsi128_ps(transpose2_2),
+                                           _MM_SHUFFLE(3, 2, 3, 2)));
+      transpose3_2 = _mm_castps_si128(
+                            _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
+                                           _mm_castsi128_ps(transpose2_3),
+                                           _MM_SHUFFLE(1, 0, 1, 0)));
+      transpose3_3 = _mm_castps_si128(
+                            _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
+                                           _mm_castsi128_ps(transpose2_3),
+                                           _MM_SHUFFLE(3, 2, 3, 2)));
+      // 00 10 20 30 40 50 60 70 80 90 A0 xx xx xx xx xx
+      // 01 11 21 31 41 51 61 71 81 91 A1 xx xx xx xx xx
+      // 02 12 22 32 42 52 62 72 82 92 A2 xx xx xx xx xx
+      // 03 13 23 33 43 53 63 73 83 93 A3 xx xx xx xx xx
+    }
+
+    // Vertical pass (transpose3_x -> dst).
+    {
+      const __m128i VFilter = _mm_load_si128((const __m128i *)VFilter_aligned16);
+      // get first two columns filter coefficients
+      __m128i fil01 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(0, 0, 0, 0));
+      __m128i fil23 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(1, 1, 1, 1));
+      __m128i fil45 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(2, 2, 2, 2));
+      __m128i fil67 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(3, 3, 3, 3));
+      __m128i col0, col1, col2, col3;
+        DECLARE_ALIGNED(16, unsigned char, temp[32]);
+      {
+        _mm_store_si128((__m128i *)temp, transpose3_0);
+        DO_FOUR_PIXELS(col0, temp, 0);
+      }
+      {
+        _mm_store_si128((__m128i *)temp, transpose3_1);
+        DO_FOUR_PIXELS(col1, temp, 0);
+      }
+      {
+        _mm_store_si128((__m128i *)temp, transpose3_2);
+        DO_FOUR_PIXELS(col2, temp, 0);
+      }
+      {
+        _mm_store_si128((__m128i *)temp, transpose3_3);
+        DO_FOUR_PIXELS(col3, temp, 0);
+      }
+      // transpose
+      {
+        __m128i T0 = _mm_unpacklo_epi32(col0, col1);
+        __m128i T1 = _mm_unpacklo_epi32(col2, col3);
+        __m128i T2 = _mm_unpackhi_epi32(col0, col1);
+        __m128i T3 = _mm_unpackhi_epi32(col2, col3);
+        col0 = _mm_unpacklo_epi64(T0, T1);
+        col1 = _mm_unpackhi_epi64(T0, T1);
+        col2 = _mm_unpacklo_epi64(T2, T3);
+        col3 = _mm_unpackhi_epi64(T2, T3);
+      }
+      // saturate to 8 bit
+      {
+        col0 = _mm_packs_epi32(col0, col0);
+        col0 = _mm_packus_epi16(col0, col0);
+        col1 = _mm_packs_epi32(col1, col1);
+        col1 = _mm_packus_epi16(col1, col1);
+        col2 = _mm_packs_epi32 (col2, col2);
+        col2 = _mm_packus_epi16(col2, col2);
+        col3 = _mm_packs_epi32 (col3, col3);
+        col3 = _mm_packus_epi16(col3, col3);
+      }
+      // store
+      {
+        *((unsigned int *)&dst_ptr[dst_stride * 0]) = _mm_cvtsi128_si32(col0);
+        *((unsigned int *)&dst_ptr[dst_stride * 1]) = _mm_cvtsi128_si32(col1);
+        *((unsigned int *)&dst_ptr[dst_stride * 2]) = _mm_cvtsi128_si32(col2);
+        *((unsigned int *)&dst_ptr[dst_stride * 3]) = _mm_cvtsi128_si32(col3);
+      }
+    }
+  }
+}
+
+void vp8_filter_block2d_8x4_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+  int j;
+  for (j=0; j<8; j+=4) {
+    vp8_filter_block2d_4x4_8_sse2(src_ptr + j, src_stride,
+                                  HFilter_aligned16, VFilter_aligned16,
+                                  dst_ptr + j, dst_stride);
+  }
+}
+
+void vp8_filter_block2d_8x8_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+  int i, j;
+  for (i=0; i<8; i+=4) {
+    for (j=0; j<8; j+=4) {
+      vp8_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride,
+                                    HFilter_aligned16, VFilter_aligned16,
+                                    dst_ptr + j + i*dst_stride, dst_stride);
+    }
+  }
+}
+
+void vp8_filter_block2d_16x16_8_sse2
+(
+ const unsigned char *src_ptr, const unsigned int src_stride,
+ const short *HFilter_aligned16, const short *VFilter_aligned16,
+ unsigned char *dst_ptr, unsigned int dst_stride
+) {
+  int i, j;
+  for (i=0; i<16; i+=4) {
+    for (j=0; j<16; j+=4) {
+      vp8_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride,
+                                    HFilter_aligned16, VFilter_aligned16,
+                                    dst_ptr + j + i*dst_stride, dst_stride);
+    }
+  }
+}
diff --git a/vp8/common/x86/filter_sse4.c b/vp8/common/x86/filter_sse4.c
index a037622e1..c461db173 100644
--- a/vp8/common/x86/filter_sse4.c
+++ b/vp8/common/x86/filter_sse4.c
@@ -25,9 +25,6 @@
 // TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum
 //           of positive above 128), or have higher precision filter
 //           coefficients.
-// TODO(cd): Remove use of _mm_extract_epi32 and _mm_extract_epi64, to not
-//           require SSE4.1
-// TODO(cd): Remove use of _mm_shuffle_epi8 to not require SSSE3
 
 DECLARE_ALIGNED(16, static const unsigned char, mask0123_c[16]) = {
   0x00, 0x01,