11 files changed, 427 insertions, 257 deletions
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 4f7b9df69..ae3ddd281 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -2145,60 +2145,61 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
 
 
 #if CONFIG_T8X8
-    do
+    /* do not do this if not evena allowed */
+    if(cpi->common.txfm_mode == ALLOW_8X8)
     {
-        int j = 0;
-
         do
         {
-            int k = 0;
-
+            int j = 0;
             do
             {
-                /* at every context */
-
-                /* calc probs and branch cts for this frame only */
-                //vp8_prob new_p           [ENTROPY_NODES];
-                //unsigned int branch_ct   [ENTROPY_NODES] [2];
-
-                int t = 0;      /* token/prob index */
-
-                vp8_tree_probs_from_distribution(
-                    MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
-                    cpi->frame_coef_probs_8x8 [i][j][k], cpi->frame_branch_ct_8x8 [i][j][k], cpi->coef_counts_8x8 [i][j][k],
-                    256, 1
-                );
-
+                int k = 0;
                 do
                 {
-                    const unsigned int *ct  = cpi->frame_branch_ct_8x8 [i][j][k][t];
-                    const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                    /* at every context */
+                    /* calc probs and branch cts for this frame only */
+                    //vp8_prob new_p           [ENTROPY_NODES];
+                    //unsigned int branch_ct   [ENTROPY_NODES] [2];
+                    int t = 0;      /* token/prob index */
+                    vp8_tree_probs_from_distribution(
+                        MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+                        cpi->frame_coef_probs_8x8 [i][j][k],
+                        cpi->frame_branch_ct_8x8 [i][j][k],
+                        cpi->coef_counts_8x8 [i][j][k],
+                        256, 1
+                        );
 
-                    const vp8_prob old = cpi->common.fc.coef_probs_8x8 [i][j][k][t];
-                    const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
+                    do
+                    {
+                        const unsigned int *ct  = cpi->frame_branch_ct_8x8 [i][j][k][t];
+                        const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
 
-                    const int old_b = vp8_cost_branch(ct, old);
-                    const int new_b = vp8_cost_branch(ct, newp);
+                        const vp8_prob old = cpi->common.fc.coef_probs_8x8 [i][j][k][t];
+                        const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
 
-                    const int update_b = 8 +
-                                         ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8);
+                        const int old_b = vp8_cost_branch(ct, old);
+                        const int new_b = vp8_cost_branch(ct, newp);
 
-                    const int s = old_b - new_b - update_b;
+                        const int update_b = 8 +
+                            ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8);
 
-                    if (s > 0)
-                        savings += s;
+                        const int s = old_b - new_b - update_b;
 
+                        if (s > 0)
+                            savings += s;
 
-                }
-                while (++t < MAX_ENTROPY_TOKENS - 1);
 
+                    }
+                    while (++t < MAX_ENTROPY_TOKENS - 1);
 
+
+                }
+                while (++k < PREV_COEF_CONTEXTS);
             }
-            while (++k < PREV_COEF_CONTEXTS);
+            while (++j < COEF_BANDS);
         }
-        while (++j < COEF_BANDS);
+        while (++i < BLOCK_TYPES);
     }
-    while (++i < BLOCK_TYPES);
 #endif
 
 
@@ -2329,91 +2330,94 @@ static void update_coef_probs(VP8_COMP *cpi)
     while (++i < BLOCK_TYPES);
 
 #if CONFIG_T8X8
-    i = 0;
-    do
+    /* do not do this if not evena allowed */
+    if(cpi->common.txfm_mode == ALLOW_8X8)
     {
-        int j = 0;
-
+        i = 0;
         do
         {
-            int k = 0;
+            int j = 0;
 
             do
             {
-                //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here.
-                /* at every context */
+                int k = 0;
 
-                /* calc probs and branch cts for this frame only */
-                //vp8_prob new_p           [ENTROPY_NODES];
-                //unsigned int branch_ct   [ENTROPY_NODES] [2];
+                do
+                {
+                    //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here.
+                    /* at every context */
 
-                int t = 0;      /* token/prob index */
+                    /* calc probs and branch cts for this frame only */
+                    //vp8_prob new_p           [ENTROPY_NODES];
+                    //unsigned int branch_ct   [ENTROPY_NODES] [2];
 
-                //vp8_tree_probs_from_distribution(
-                //    MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
-                //    new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k],
-                //    256, 1
-                //    );
+                    int t = 0;      /* token/prob index */
 
-                do
-                {
-                    const unsigned int *ct  = cpi->frame_branch_ct_8x8 [i][j][k][t];
-                    const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                    //vp8_tree_probs_from_distribution(
+                    //    MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
+                    //    new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k],
+                    //    256, 1
+                    //    );
 
-                    vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
-                    const vp8_prob old = *Pold;
-                    const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
+                    do
+                    {
+                        const unsigned int *ct  = cpi->frame_branch_ct_8x8 [i][j][k][t];
+                        const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
 
-                    const int old_b = vp8_cost_branch(ct, old);
-                    const int new_b = vp8_cost_branch(ct, newp);
+                        vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
+                        const vp8_prob old = *Pold;
+                        const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
 
-                    const int update_b = 8 +
-                                         ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8);
+                        const int old_b = vp8_cost_branch(ct, old);
+                        const int new_b = vp8_cost_branch(ct, newp);
 
-                    const int s = old_b - new_b - update_b;
-                    const int u = s > 0 ? 1 : 0;
+                        const int update_b = 8 +
+                            ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8);
 
-                    vp8_write(w, u, upd);
+                        const int s = old_b - new_b - update_b;
+                        const int u = s > 0 ? 1 : 0;
+
+                        vp8_write(w, u, upd);
 
 
 #ifdef ENTROPY_STATS
-                    ++ tree_update_hist_8x8 [i][j][k][t] [u];
+                        ++ tree_update_hist_8x8 [i][j][k][t] [u];
 #endif
 
-                    if (u)
-                    {
-                        /* send/use new probability */
+                        if (u)
+                        {
+                            /* send/use new probability */
 
-                        *Pold = newp;
-                        vp8_write_literal(w, newp, 8);
+                            *Pold = newp;
+                            vp8_write_literal(w, newp, 8);
 
-                        savings += s;
+                            savings += s;
 
-                    }
+                        }
 
-                }
-                while (++t < MAX_ENTROPY_TOKENS - 1);
+                    }
+                    while (++t < MAX_ENTROPY_TOKENS - 1);
 
-                /* Accum token counts for generation of default statistics */
+                    /* Accum token counts for generation of default statistics */
 #ifdef ENTROPY_STATS
-                t = 0;
+                    t = 0;
 
-                do
-                {
-                    context_counters_8x8 [i][j][k][t] += cpi->coef_counts_8x8 [i][j][k][t];
-                }
-                while (++t < MAX_ENTROPY_TOKENS);
+                    do
+                    {
+                        context_counters_8x8 [i][j][k][t] += cpi->coef_counts_8x8 [i][j][k][t];
+                    }
+                    while (++t < MAX_ENTROPY_TOKENS);
 
 #endif
-
+                }
+                while (++k < PREV_COEF_CONTEXTS);
             }
-            while (++k < PREV_COEF_CONTEXTS);
+            while (++j < COEF_BANDS);
         }
-        while (++j < COEF_BANDS);
+        while (++i < BLOCK_TYPES);
     }
-    while (++i < BLOCK_TYPES);
-#endif
 
+#endif
 }
 #ifdef PACKET_TESTING
 FILE *vpxlogc = 0;
@@ -2664,6 +2668,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
         }
     }
 
+#if CONFIG_T8X8
+    vp8_write_bit(bc, pc->txfm_mode);
+#endif
+
     // Encode the loop filter level and type
     vp8_write_bit(bc, pc->filter_type);
     vp8_write_literal(bc, pc->filter_level, 6);
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 3ac3d79f0..e19749052 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -47,6 +47,9 @@ typedef struct
     int src_stride;
 
     int eob_max_offset;
+#if CONFIG_T8X8
+    int eob_max_offset_8x8;
+#endif
 
 } BLOCK;
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index c45fe59c7..edbdf3fde 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -937,6 +937,11 @@ void encode_mb_row(VP8_COMP *cpi,
 
         x->active_ptr = cpi->active_map + map_index + mb_col;
 
+#if CONFIG_T8X8
+        /* force 4x4 transform for mode selection */
+        xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+#endif
+
         if (cm->frame_type == KEY_FRAME)
         {
             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
@@ -1727,6 +1732,22 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
         vp8_update_zbin_extra(cpi, x);
     }
 
+#if CONFIG_T8X8
+    /* test code: set transform size based on mode selection */
+    if(cpi->common.txfm_mode == ALLOW_8X8
+        && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != B_PRED)
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+        cpi->t8x8_count++;
+    }
+    else
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+        cpi->t4x4_count ++;
+    }
+#endif
+
     if(x->e_mbd.mode_info_context->mbmi.mode == I8X8_PRED)
     {
         vp8_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x);
@@ -1741,16 +1762,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
     sum_intra_stats(cpi, x);
     vp8_tokenize_mb(cpi, &x->e_mbd, t);
-#if CONFIG_T8X8
-        if ( get_seg_tx_type(&x->e_mbd,
-                             x->e_mbd.mode_info_context->mbmi.segment_id)
-             == TX_8X8 )
-        {
-            cpi->t8x8_count++;
-        }
-        else
-            cpi->t4x4_count++;
-#endif
+
     return rate;
 }
 #ifdef SPEEDSTATS
@@ -1817,6 +1829,22 @@ int vp8cx_encode_inter_macroblock
                 cpi->dual_pred_count[pred_context]++;
         }
 
+#if CONFIG_T8X8
+        /* test code: set transform size based on mode selection */
+        if( cpi->common.txfm_mode == ALLOW_8X8
+            && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+            && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+            && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+        {
+            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+            cpi->t8x8_count ++;
+        }
+        else
+        {
+            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+            cpi->t4x4_count++;
+        }
+#endif
         /* switch back to the regular quantizer for the encode */
         if (cpi->sf.improved_quant)
         {
@@ -1825,7 +1853,6 @@ int vp8cx_encode_inter_macroblock
             cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize,
                                                       quantb_pair);
         }
-
         /* restore cpi->zbin_mode_boost_enabled */
         cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
 
@@ -2015,14 +2042,6 @@ int vp8cx_encode_inter_macroblock
                                            xd->dst.y_stride, xd->dst.uv_stride);
         }
     }
-#if CONFIG_T8X8
-    if ( get_seg_tx_type( xd, *segment_id ) == TX_8X8 )
-    {
-        cpi->t8x8_count++;
-    }
-    else
-        cpi->t4x4_count++;
-#endif
 
     if (!x->skip)
     {
@@ -2066,6 +2085,5 @@ int vp8cx_encode_inter_macroblock
             cpi->skip_false_count ++;
         }
     }
-
     return rate;
 }
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 6f825ff59..062f31ca7 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -100,9 +100,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     BLOCK *b = &x->block[0];
 
 #if CONFIG_T8X8
-    int tx_type = get_seg_tx_type(&x->e_mbd,
-                                  x->e_mbd.mode_info_context->mbmi.segment_id);
-    x->e_mbd.mode_info_context->mbmi.txfm_size = tx_type;
+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
 #endif
 
     RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
@@ -179,11 +177,8 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
 #if CONFIG_T8X8
-    int tx_type = get_seg_tx_type(&x->e_mbd,
-                                  x->e_mbd.mode_info_context->mbmi.segment_id);
-    x->e_mbd.mode_info_context->mbmi.txfm_size = tx_type;
+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
 #endif
-
     RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd);
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 42af30408..270bdf585 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1258,9 +1258,7 @@ void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
 #if CONFIG_T8X8
-    int tx_type = get_seg_tx_type(&x->e_mbd,
-                                  x->e_mbd.mode_info_context->mbmi.segment_id);
-    x->e_mbd.mode_info_context->mbmi.txfm_size = tx_type;
+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
 #endif
 
     vp8_build_inter_predictors_mb(&x->e_mbd);
@@ -1357,8 +1355,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
 #if CONFIG_T8X8
-    int tx_type = get_seg_tx_type(&x->e_mbd,
-                                  x->e_mbd.mode_info_context->mbmi.segment_id);
+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
 #endif
 
     BLOCK *b = &x->block[0];
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index f7a513df9..be0d21aa4 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -359,6 +359,9 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
         */
         z->block[i].src_stride      = x->block[i].src_stride;
         z->block[i].eob_max_offset  = x->block[i].eob_max_offset;
+#if CONFIG_T8X8
+        z->block[i].eob_max_offset_8x8  = x->block[i].eob_max_offset_8x8;
+#endif
     }
 
     {
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ff2493d9f..45b9dfac7 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -579,16 +579,11 @@ static void init_seg_features(VP8_COMP *cpi)
                     set_segdata( xd, 1, SEG_LVL_MODE, ZEROMV );
                     enable_segfeature(xd, 1, SEG_LVL_MODE);
 
-                    if ( !segfeature_active( xd, 1, SEG_LVL_TRANSFORM ) ||
-                         get_seg_tx_type( xd, 1 ) == TX_4X4 )
-                    {
-                        // EOB segment coding not fixed for 8x8 yet
-                        set_segdata( xd, 1, SEG_LVL_EOB, 0 );
-                        enable_segfeature(xd, 1, SEG_LVL_EOB);
-                    }
+                    // EOB segment coding not fixed for 8x8 yet
+                    set_segdata( xd, 1, SEG_LVL_EOB, 0 );
+                    enable_segfeature(xd, 1, SEG_LVL_EOB);
                 }
             }
-
             // Disable segmentation and clear down features if alt ref
             // is not active for this group
             else
@@ -627,23 +622,11 @@ static void init_seg_features(VP8_COMP *cpi)
             // Skip all MBs if high Q
             if ( high_q )
             {
-                // EOB segment coding not fixed for 8x8 yet
-                if ( !segfeature_active( xd, 0, SEG_LVL_TRANSFORM ) ||
-                     get_seg_tx_type( xd, 0 ) == TX_4X4 )
-                {
-                    enable_segfeature(xd, 0, SEG_LVL_EOB);
-                    set_segdata( xd, 0, SEG_LVL_EOB, 0 );
-                }
-
-                // EOB segment coding not fixed for 8x8 yet
-                if ( !segfeature_active( xd, 1, SEG_LVL_TRANSFORM ) ||
-                     get_seg_tx_type( xd, 1 ) == TX_4X4 )
-                {
-                    enable_segfeature(xd, 1, SEG_LVL_EOB);
-                    set_segdata( xd, 1, SEG_LVL_EOB, 0 );
-                }
+                enable_segfeature(xd, 0, SEG_LVL_EOB);
+                set_segdata( xd, 0, SEG_LVL_EOB, 0 );
+                enable_segfeature(xd, 1, SEG_LVL_EOB);
+                set_segdata( xd, 1, SEG_LVL_EOB, 0 );
             }
-
             // Enable data udpate
             xd->update_mb_segmentation_data = 1;
         }
@@ -653,25 +636,6 @@ static void init_seg_features(VP8_COMP *cpi)
             // No updeates.. leave things as they are.
             xd->update_mb_segmentation_map = 0;
             xd->update_mb_segmentation_data = 0;
-
-#if CONFIG_T8X8
-            {
-                vp8_disable_segmentation((VP8_PTR)cpi);
-                clearall_segfeatures(xd);
-                vp8_enable_segmentation((VP8_PTR)cpi);
-                // 8x8TX test code.
-                // This assignment does not necessarily make sense but is
-                // just to test the mechanism for now.
-                enable_segfeature(xd, 0, SEG_LVL_TRANSFORM);
-                set_segdata( xd, 0, SEG_LVL_TRANSFORM, TX_4X4 );
-                enable_segfeature(xd, 1, SEG_LVL_TRANSFORM);
-                set_segdata( xd, 1, SEG_LVL_TRANSFORM, TX_8X8 );
-                /* force every mb to use 8x8 transform for testing*/
-                vpx_memset(cpi->segmentation_map, 1,
-                    cpi->common.mb_rows * cpi->common.mb_cols);
-
-            }
-#endif
         }
     }
 }
@@ -2645,7 +2609,9 @@ void vp8_remove_compressor(VP8_PTR *ptr)
 #if CONFIG_INTERNAL_STATS
 
         vp8_clear_system_state();
-
+#if CONFIG_T8X8
+        printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count);
+#endif
         if (cpi->pass != 1)
         {
             FILE *f = fopen("opsnr.stt", "a");
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index b1603c39e..04939c7c7 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -472,50 +472,52 @@ void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d)
 
 void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d)
 {
-  int i, rc, eob;
-  int zbin;
-  int x, y, z, sz;
-  short *coeff_ptr  = b->coeff;
-  short *zbin_ptr   = b->zbin;
-  short *round_ptr  = b->round;
-  short *quant_ptr  = b->quant;
-  short *qcoeff_ptr = d->qcoeff;
-  short *dqcoeff_ptr = d->dqcoeff;
-  short *dequant_ptr = d->dequant;
-  //double q1st = 2;
-  vpx_memset(qcoeff_ptr, 0, 64*sizeof(short));
-  vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short));
-
-  eob = -1;
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *coeff_ptr  = b->coeff;
+    short *zbin_ptr   = b->zbin;
+    short *round_ptr  = b->round;
+    short *quant_ptr  = b->quant;
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = d->dequant;
+    //double q1st = 2;
+    vpx_memset(qcoeff_ptr, 0, 64*sizeof(short));
+    vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short));
 
-  for (i = 0; i < 64; i++)
-  {
+    eob = -1;
 
-    rc   = vp8_default_zig_zag1d_8x8[i];
-    z    = coeff_ptr[rc];
-    //zbin = zbin_ptr[rc!=0]/q1st ;
-    zbin = zbin_ptr[rc!=0] ;
 
-    sz = (z >> 31);                                 // sign of z
-    x  = (z ^ sz) - sz;                             // x = abs(z)
 
-    if (x >= zbin)
+    for (i = 0; i < 64; i++)
     {
-      //y  = ((int)((x + round_ptr[rc!=0] / q1st) * quant_ptr[rc!=0] * q1st)) >> 16;
-      y  = ((int)((x + round_ptr[rc!=0]) * quant_ptr[rc!=0])) >> 16;
-      x  = (y ^ sz) - sz;                         // get the sign back
-      qcoeff_ptr[rc] = x;                         // write to destination
-      //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st;        // dequantized value
-      dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0];        // dequantized value
-      dqcoeff_ptr[rc] = (dqcoeff_ptr[rc]+2)>>2;
 
-      if (y)
-      {
-        eob = i;                                // last nonzero coeffs
-      }
+        rc   = vp8_default_zig_zag1d_8x8[i];
+        z    = coeff_ptr[rc];
+        //zbin = zbin_ptr[rc!=0]/q1st ;
+        zbin = zbin_ptr[rc!=0] ;
+
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            //y  = ((int)((x + round_ptr[rc!=0] / q1st) * quant_ptr[rc!=0] * q1st)) >> 16;
+            y  = ((int)((x + round_ptr[rc!=0]) * quant_ptr[rc!=0])) >> 16;
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc] = x;                         // write to destination
+            //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st;        // dequantized value
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0];        // dequantized value
+            dqcoeff_ptr[rc] = (dqcoeff_ptr[rc]+2)>>2;
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+            }
+        }
     }
-  }
-  d->eob = eob + 1;
+    d->eob = eob + 1;
 }
 
 #endif //EXACT_FASTQUANT
@@ -542,7 +544,7 @@ void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d)
 
   eob = -1;
 
-  for (i = 0; i < 4; i++)
+  for (i = 0; i < b->eob_max_offset_8x8; i++)
   {
     rc   = vp8_default_zig_zag1d[i];
     z    = coeff_ptr[rc];
@@ -600,7 +602,7 @@ void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d)
 
   eob = -1;
 
-  for (i = 0; i < 64; i++)
+  for (i = 0; i < b->eob_max_offset_8x8; i++)
   {
 
     rc   = vp8_default_zig_zag1d_8x8[i];
@@ -1138,9 +1140,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
         {
             x->block[i].eob_max_offset =
                 get_segdata( xd, segment_id, SEG_LVL_EOB );
+#if CONFIG_T8X8
+            x->block[i].eob_max_offset_8x8 =
+                get_segdata( xd, segment_id, SEG_LVL_EOB );
+#endif
         }
         else
+        {
             x->block[i].eob_max_offset = 16;
+#if CONFIG_T8X8
+            x->block[i].eob_max_offset_8x8 = 64;
+#endif
+        }
     }
 
     // UV
@@ -1165,9 +1176,20 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
         {
             x->block[i].eob_max_offset =
                 get_segdata( xd, segment_id, SEG_LVL_EOB );
+#if CONFIG_T8X8
+            x->block[i].eob_max_offset_8x8 =
+                get_segdata( xd, segment_id, SEG_LVL_EOB );
+#endif
+
         }
         else
+        {
             x->block[i].eob_max_offset = 16;
+#if CONFIG_T8X8
+            x->block[i].eob_max_offset_8x8 = 64;
+#endif
+
+        }
     }
 
     // Y2
@@ -1191,9 +1213,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     {
         x->block[24].eob_max_offset =
             get_segdata( xd, segment_id, SEG_LVL_EOB );
+#if CONFIG_T8X8
+        x->block[24].eob_max_offset_8x8 =
+            get_segdata( xd, segment_id, SEG_LVL_EOB );
+#endif
     }
     else
+    {
         x->block[24].eob_max_offset = 16;
+#if CONFIG_T8X8
+        x->block[24].eob_max_offset_8x8 = 64;
+#endif
+    }
 
     /* save this macroblock QIndex for vp8_update_zbin_extra() */
     x->q_index = QIndex;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 83768b32f..5d4121fac 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -225,6 +225,10 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
 
     vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc));  //initialize pre_mvc to all zero.
 
+#if CONFIG_T8X8
+    cpi->common.txfm_mode = ONLY_4X4;
+#endif
+
     //cpi->common.filter_level = 0;      // Reset every key frame.
     cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
 
@@ -251,6 +255,13 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
 }
 void vp8_setup_inter_frame(VP8_COMP *cpi)
 {
+#if CONFIG_T8X8
+    if(cpi->common.Width * cpi->common.Height > 640*360)
+        cpi->common.txfm_mode = ALLOW_8X8;
+    else
+        cpi->common.txfm_mode = ONLY_4X4;
+#endif
+
     if(cpi->common.refresh_alt_ref_frame)
     {
         vpx_memcpy( &cpi->common.fc,
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 24084ce6a..10fe97e68 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -647,11 +647,147 @@ static void macro_block_yrd( MACROBLOCK *mb,
     d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff)<<2;
 
     *Distortion = (d >> 4);
-
     // rate
     *Rate = vp8_rdcost_mby(mb);
 }
 
+#if CONFIG_T8X8
+
+static int cost_coeffs_2x2(MACROBLOCK *mb,
+                           BLOCKD *b, int type,
+                           ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
+{
+    int c = !type;              /* start at coef 0, unless Y with Y2 */
+    int eob = b->eob;
+    int pt ;    /* surrounding block/prev coef predictor */
+    int cost = 0;
+    short *qcoeff_ptr = b->qcoeff;
+
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+    assert(eob<=4);
+
+# define QC2X2( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
+
+    for (; c < eob; c++)
+    {
+        int v = QC2X2(c);
+        int t = vp8_dct_value_tokens_ptr[v].Token;
+        cost += mb->token_costs_8x8[type] [vp8_coef_bands[c]] [pt] [t];
+        cost += vp8_dct_value_cost_ptr[v];
+        pt = vp8_prev_token_class[t];
+    }
+
+# undef QC2X2
+    if (c < 4)
+        cost += mb->token_costs_8x8 [type][vp8_coef_bands[c]]
+                                    [pt] [DCT_EOB_TOKEN];
+
+    pt = (c != !type); // is eob first coefficient;
+    *a = *l = pt;
+    return cost;
+}
+
+
+static int cost_coeffs_8x8(MACROBLOCK *mb,
+                           BLOCKD *b, int type,
+                           ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+                           ENTROPY_CONTEXT *a1, ENTROPY_CONTEXT *l1)
+{
+    int c = !type;              /* start at coef 0, unless Y with Y2 */
+    int eob = b->eob;
+    int pt ;    /* surrounding block/prev coef predictor */
+    int cost = 0;
+    short *qcoeff_ptr = b->qcoeff;
+
+    VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1);
+
+# define QC8X8( I)  ( qcoeff_ptr [vp8_default_zig_zag1d_8x8[I]] )
+
+    for (; c < eob; c++)
+    {
+        int v = QC8X8(c);
+        int t = vp8_dct_value_tokens_ptr[v].Token;
+        cost += mb->token_costs_8x8[type] [vp8_coef_bands_8x8[c]] [pt] [t];
+        cost += vp8_dct_value_cost_ptr[v];
+        pt = vp8_prev_token_class[t];
+    }
+
+# undef QC8X8
+    if (c < 64)
+        cost += mb->token_costs_8x8 [type][vp8_coef_bands_8x8[c]]
+                                    [pt] [DCT_EOB_TOKEN];
+
+    pt = (c != !type); // is eob first coefficient;
+    *a = *l = pt;
+    return cost;
+}
+static int vp8_rdcost_mby_8x8(MACROBLOCK *mb)
+{
+    int cost = 0;
+    int b;
+    MACROBLOCKD *x = &mb->e_mbd;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
+
+    for (b = 0; b < 16; b+=4)
+        cost += cost_coeffs_8x8(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
+                    ta + vp8_block2above[b], tl + vp8_block2left[b],
+                    ta + vp8_block2above[b+4], tl + vp8_block2left[b+4]);
+
+    cost += cost_coeffs_2x2(mb, x->block + 24, PLANE_TYPE_Y2,
+                ta + vp8_block2above[24], tl + vp8_block2left[24]);
+    return cost;
+}
+
+static void macro_block_yrd_8x8( MACROBLOCK *mb,
+                             int *Rate,
+                             int *Distortion,
+                             const VP8_ENCODER_RTCD *rtcd)
+{
+    int b;
+    MACROBLOCKD *const x = &mb->e_mbd;
+    BLOCK   *const mb_y2 = mb->block + 24;
+    BLOCKD *const x_y2  = x->block + 24;
+    short *Y2DCPtr = mb_y2->src_diff;
+    BLOCK *beptr;
+    int d;
+
+    ENCODEMB_INVOKE(&rtcd->encodemb, submby)
+        ( mb->src_diff, *(mb->block[0].base_src),
+        mb->e_mbd.predictor, mb->block[0].src_stride );
+
+    vp8_transform_mby_8x8(mb);
+    vp8_quantize_mby_8x8(mb);
+
+    /* remove 1st order dc to properly combine 1st/2nd order distortion */
+    mb->coeff[0] = 0;
+    mb->coeff[64] = 0;
+    mb->coeff[128] = 0;
+    mb->coeff[192] = 0;
+    mb->e_mbd.dqcoeff[0] = 0;
+    mb->e_mbd.dqcoeff[64] = 0;
+    mb->e_mbd.dqcoeff[128] = 0;
+    mb->e_mbd.dqcoeff[192] = 0;
+    d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0) << 2;
+
+#if CONFIG_EXTEND_QRANGE
+    d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff)<<2;
+#else
+    d += ENCODEMB_INVOKE(&rtcd->encodemb, berr)(mb_y2->coeff, x_y2->dqcoeff);
+#endif
+
+    *Distortion = (d >> 4);
+    // rate
+    *Rate = vp8_rdcost_mby_8x8(mb);
+}
+#endif
 
 static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
 {
@@ -1044,8 +1180,7 @@ static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
                             int *distortion, int fullpixel)
 {
 #if CONFIG_T8X8
-    int tx_type = get_seg_tx_type(&x->e_mbd,
-                                  x->e_mbd.mode_info_context->mbmi.segment_id);
+    int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
 #endif
 
     ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
@@ -2247,16 +2382,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         {
             continue;
         }
-#if CONFIG_T8X8
-        // No 4x4 modes if segment flagged as 8x8
-        else if ( ( get_seg_tx_type( xd, segment_id ) == TX_8X8 ) &&
-                  ( (this_mode == B_PRED)
-                  ||(this_mode == I8X8_PRED)
-                  || (this_mode == SPLITMV) ) )
-        {
-            continue;
-        }
-#endif
 
         // Disable this drop out case if either the mode or ref frame
         // segment level feature is enabled for this segment. This is to
@@ -2404,7 +2529,14 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
             RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
                 (&x->e_mbd);
-            macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
+#if CONFIG_T8X8
+            if(cpi->common.txfm_mode == ALLOW_8X8)
+                macro_block_yrd_8x8(x, &rate_y, &distortion,
+                                IF_RTCD(&cpi->rtcd)) ;
+            else
+#endif
+                macro_block_yrd(x, &rate_y, &distortion,
+                                IF_RTCD(&cpi->rtcd.encodemb)) ;
             rate2 += rate_y;
             distortion2 += distortion;
             rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
@@ -2628,15 +2760,21 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                     }
                 }
             }
-
-
             //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts);   // Experimental debug code
 
             // Add in the Mv/mode cost
             rate2 += vp8_cost_mv_ref(&cpi->common, this_mode, mdcounts);
 
             // Y cost and distortion
-            macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
+#if CONFIG_T8X8
+            if(cpi->common.txfm_mode == ALLOW_8X8)
+                macro_block_yrd_8x8(x, &rate_y, &distortion,
+                                IF_RTCD(&cpi->rtcd));
+            else
+#endif
+                macro_block_yrd(x, &rate_y, &distortion,
+                                IF_RTCD(&cpi->rtcd.encodemb));
+
             rate2 += rate_y;
             distortion2 += distortion;
 
@@ -3027,42 +3165,18 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
                                             &rate16x16, &rate16x16_tokenonly,
                                             &dist16x16);
     mode16x16 = x->e_mbd.mode_info_context->mbmi.mode;
-#if CONFIG_T8X8
-    if ( get_seg_tx_type( xd,
-                          xd->mode_info_context->mbmi.segment_id ) == TX_8X8)
-    {
-        error8x8 = INT_MAX;
-    }
-    else
-#else
-    {
-        error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
-                                            &rate8x8, &rate8x8_tokenonly,
-                                            &dist8x8, error16x16);
-        mode8x8[0]= x->e_mbd.mode_info_context->bmi[0].as_mode;
-        mode8x8[1]= x->e_mbd.mode_info_context->bmi[2].as_mode;
-        mode8x8[2]= x->e_mbd.mode_info_context->bmi[8].as_mode;
-        mode8x8[3]= x->e_mbd.mode_info_context->bmi[10].as_mode;
-    }
-#endif
 
-#if CONFIG_T8X8
-    if ( get_seg_tx_type( xd,
-                          xd->mode_info_context->mbmi.segment_id ) == TX_4X4 )
-    {
-        error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
-                                             &rate4x4, &rate4x4_tokenonly,
-                                             &dist4x4, error16x16);
-    }
-    else
-    {
-        error4x4 = INT_MAX;
-    }
-#else
+    error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
+                &rate8x8, &rate8x8_tokenonly,
+                &dist8x8, error16x16);
+    mode8x8[0]= x->e_mbd.mode_info_context->bmi[0].as_mode;
+    mode8x8[1]= x->e_mbd.mode_info_context->bmi[2].as_mode;
+    mode8x8[2]= x->e_mbd.mode_info_context->bmi[8].as_mode;
+    mode8x8[3]= x->e_mbd.mode_info_context->bmi[10].as_mode;
+
     error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
                                          &rate4x4, &rate4x4_tokenonly,
                                          &dist4x4, error16x16);
-#endif
 
     if(error8x8> error16x16)
     {
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 2a17c312c..d753bb141 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -107,6 +107,7 @@ static void fill_value_tokens()
 #if CONFIG_T8X8
 static void tokenize2nd_order_b_8x8
 (
+     MACROBLOCKD *xd,
     const BLOCKD *const b,
     TOKENEXTRA **tp,
     const int type,     /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
@@ -122,6 +123,16 @@ static void tokenize2nd_order_b_8x8
     TOKENEXTRA *t = *tp;        /* store tokens starting here */
     int x;
     const short *qcoeff_ptr = b->qcoeff;
+
+    int seg_eob = 64;
+    int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+    if ( segfeature_active( xd, segment_id, SEG_LVL_EOB ) )
+    {
+        seg_eob = get_segdata( xd, segment_id, SEG_LVL_EOB );
+    }
+
+
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
     assert(eob<=4);
@@ -158,7 +169,7 @@ static void tokenize2nd_order_b_8x8
 
         ++cpi->coef_counts_8x8       [type] [band] [pt] [x];
     }
-    while (pt = vp8_prev_token_class[x], ++t, c < eob  &&  ++c < 4);
+    while (pt = vp8_prev_token_class[x], ++t, c < eob  &&  ++c <seg_eob);
 
     *tp = t;
     pt = (c != !type); /* 0 <-> all coeff data is zero */
@@ -239,6 +250,7 @@ static void tokenize2nd_order_b
 #if CONFIG_T8X8
 static void tokenize1st_order_b_8x8
 (
+     MACROBLOCKD *xd,
     const BLOCKD *const b,
     TOKENEXTRA **tp,
     const int type,     /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
@@ -256,6 +268,15 @@ static void tokenize1st_order_b_8x8
     TOKENEXTRA *t = *tp;        /* store tokens starting here */
     int x;
     const short *qcoeff_ptr = b->qcoeff;
+
+    int seg_eob = 64;
+    int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+    if ( segfeature_active( xd, segment_id, SEG_LVL_EOB ) )
+    {
+        seg_eob = get_segdata( xd, segment_id, SEG_LVL_EOB );
+    }
+
     VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1);
 
     do
@@ -287,7 +308,7 @@ static void tokenize1st_order_b_8x8
 
         ++cpi->coef_counts_8x8       [type] [band] [pt] [x];
     }
-    while (pt = vp8_prev_token_class[x], ++t, c < eob  &&  ++c < 64);
+    while (pt = vp8_prev_token_class[x], ++t, c < eob  &&  ++c < seg_eob);
 
     *tp = t;
     pt = (c != !type); /* 0 <-> all coeff data is zero */
@@ -475,7 +496,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
     int b;
 
 #if CONFIG_T8X8
-    int tx_type = get_seg_tx_type(x, x->mode_info_context->mbmi.segment_id);
+    int tx_type = x->mode_info_context->mbmi.txfm_size;
 #endif
 
     // If the MB is going to be skipped because of a segment level flag
@@ -536,7 +557,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
         {
             ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
             ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
-            tokenize2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type,
+            tokenize2nd_order_b_8x8(x,
+                        x->block + 24, t, 1, x->frame_type,
                        A + vp8_block2above[24], L + vp8_block2left[24], cpi);
         }
         else
@@ -553,7 +575,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
         ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
         for (b = 0; b < 16; b+=4)
         {
-            tokenize1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type,
+            tokenize1st_order_b_8x8(x,
+                                x->block + b, t, plane_type, x->frame_type,
                                 A + vp8_block2above[b],
                                 L + vp8_block2left[b],
                                 A + vp8_block2above[b+1],
@@ -592,7 +615,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
         }
 
         for (b = 16; b < 24; b+=4) {
-            tokenize1st_order_b_8x8(x->block + b, t, 2, x->frame_type,
+            tokenize1st_order_b_8x8(x,
+                                    x->block + b, t, 2, x->frame_type,
                                     A + vp8_block2above[b],
                                     L + vp8_block2left[b],
                                     A + vp8_block2above[b+1],