8 files changed, 27 insertions, 26 deletions
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index ffd7d235a..863350382 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -35,7 +35,7 @@ typedef void (*iht_t) (const int16_t *in, uint8_t *dst, int stride,
               int tx_type);
 
 void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
-  vp9_short_fdct8x8_c(in, out, stride);
+  vp9_fdct8x8_c(in, out, stride);
 }
 
 void fht8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
@@ -297,7 +297,7 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
     C, FwdTrans8x8DCT,
     ::testing::Values(
-        make_tuple(&vp9_short_fdct8x8_c, &vp9_idct8x8_64_add_c, 0)));
+        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0)));
 INSTANTIATE_TEST_CASE_P(
     C, FwdTrans8x8HT,
     ::testing::Values(
@@ -310,7 +310,7 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     SSE2, FwdTrans8x8DCT,
     ::testing::Values(
-        make_tuple(&vp9_short_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0)));
+        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0)));
 INSTANTIATE_TEST_CASE_P(
     SSE2, FwdTrans8x8HT,
     ::testing::Values(
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 7d9a8b989..1a03269fb 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -201,6 +201,7 @@ typedef struct macroblockd {
   // A NULL indicates that the 8x8 is not part of the image
   MODE_INFO **mi_8x8;
   MODE_INFO **prev_mi_8x8;
+  MODE_INFO *mi_stream;
 
   int up_available;
   int left_available;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 9d9c48fbe..fc042ecb9 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -695,8 +695,8 @@ specialize vp9_short_fht8x8 sse2
 prototype void vp9_short_fht16x16 "int16_t *InputData, int16_t *OutputData, int pitch, int tx_type"
 specialize vp9_short_fht16x16 sse2
 
-prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int stride"
-specialize vp9_short_fdct8x8 sse2
+prototype void vp9_fdct8x8 "int16_t *input, int16_t *output, int stride"
+specialize vp9_fdct8x8 sse2
 
 prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride"
 specialize vp9_short_fdct4x4 sse2
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index cfec36b42..74de6c670 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -525,7 +525,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
 
   // 2-D
   for (i = 0; i < 2; i++) {
-    // 8x8 Transpose is copied from vp9_short_fdct8x8_sse2()
+    // 8x8 Transpose is copied from vp9_fdct8x8_sse2()
     TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
                   in4, in5, in6, in7);
 
@@ -638,7 +638,7 @@ static void idct8_1d_sse2(__m128i *in) {
   in6 = in[6];
   in7 = in[7];
 
-  // 8x8 Transpose is copied from vp9_short_fdct8x8_sse2()
+  // 8x8 Transpose is copied from vp9_fdct8x8_sse2()
   TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
                 in4, in5, in6, in7);
 
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 12ce82157..3ee8ba41d 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -260,7 +260,6 @@ static int decode_tokens(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 }
 
 static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize,
-                        int tile_col,
                         int mi_row, int mi_col) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
@@ -274,9 +273,9 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize,
   xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
 
   // we are using the mode info context stream here
-  xd->mi_8x8[0] = pbi->mi_streams[tile_col];
+  xd->mi_8x8[0] = xd->mi_stream;
   xd->mi_8x8[0]->mbmi.sb_type = bsize;
-  pbi->mi_streams[tile_col]++;
+  ++xd->mi_stream;
 
   // Special case: if prev_mi is NULL, the previous mode info context
   // cannot be used.
@@ -306,8 +305,7 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   xd->corrupted |= cfg->corrupted;
 }
 
-static void decode_modes_b(VP9D_COMP *pbi, int tile_col,
-                           int mi_row, int mi_col,
+static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
                            vp9_reader *r, BLOCK_SIZE bsize, int index) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
@@ -319,7 +317,7 @@ static void decode_modes_b(VP9D_COMP *pbi, int tile_col,
     if (index > 0)
       return;
 
-  set_offsets(pbi, bsize, tile_col, mi_row, mi_col);
+  set_offsets(pbi, bsize, mi_row, mi_col);
   vp9_read_mode_info(cm, xd, mi_row, mi_col, r);
 
   if (less8x8)
@@ -356,8 +354,7 @@ static void decode_modes_b(VP9D_COMP *pbi, int tile_col,
   xd->corrupted |= vp9_reader_has_error(r);
 }
 
-static void decode_modes_sb(VP9D_COMP *pbi, int tile_col,
-                            int mi_row, int mi_col,
+static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
                             vp9_reader* r, BLOCK_SIZE bsize, int index) {
   VP9_COMMON *const cm = &pbi->common;
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
@@ -393,23 +390,23 @@ static void decode_modes_sb(VP9D_COMP *pbi, int tile_col,
 
   switch (partition) {
     case PARTITION_NONE:
-      decode_modes_b(pbi, tile_col, mi_row, mi_col, r, subsize, 0);
+      decode_modes_b(pbi, mi_row, mi_col, r, subsize, 0);
       break;
     case PARTITION_HORZ:
-      decode_modes_b(pbi, tile_col, mi_row, mi_col, r, subsize, 0);
+      decode_modes_b(pbi, mi_row, mi_col, r, subsize, 0);
       if (mi_row + hbs < cm->mi_rows)
-        decode_modes_b(pbi, tile_col, mi_row + hbs, mi_col, r, subsize, 1);
+        decode_modes_b(pbi, mi_row + hbs, mi_col, r, subsize, 1);
       break;
     case PARTITION_VERT:
-      decode_modes_b(pbi, tile_col, mi_row, mi_col, r, subsize, 0);
+      decode_modes_b(pbi, mi_row, mi_col, r, subsize, 0);
       if (mi_col + hbs < cm->mi_cols)
-        decode_modes_b(pbi, tile_col, mi_row, mi_col + hbs, r, subsize, 1);
+        decode_modes_b(pbi, mi_row, mi_col + hbs, r, subsize, 1);
       break;
     case PARTITION_SPLIT: {
       int n;
       for (n = 0; n < 4; n++) {
         const int j = n >> 1, i = n & 1;
-        decode_modes_sb(pbi, tile_col, mi_row + j * hbs, mi_col + i * hbs,
+        decode_modes_sb(pbi, mi_row + j * hbs, mi_col + i * hbs,
                         r, subsize, n);
       }
     } break;
@@ -668,6 +665,9 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r, int tile_col) {
   VP9_COMMON *const cm = &pbi->common;
   int mi_row, mi_col;
   YV12_BUFFER_CONFIG *const fb = &cm->yv12_fb[cm->new_fb_idx];
+  MACROBLOCKD *xd = &pbi->mb;
+
+  xd->mi_stream = pbi->mi_streams[tile_col];
 
   if (pbi->do_loopfilter_inline) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
@@ -686,7 +686,7 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r, int tile_col) {
     vp9_zero(cm->left_seg_context);
     for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
          mi_col += MI_BLOCK_SIZE)
-      decode_modes_sb(pbi, tile_col, mi_row, mi_col, r, BLOCK_64X64, 0);
+      decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64, 0);
 
     if (pbi->do_loopfilter_inline) {
       const int lf_start = mi_row - MI_BLOCK_SIZE;
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 37f8414a5..773a2b2e5 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -229,7 +229,7 @@ static void fdct8(const int16_t *input, int16_t *output) {
   output[7] = dct_const_round_shift(t3);
 }
 
-void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int stride) {
+void vp9_fdct8x8_c(int16_t *input, int16_t *final_output, int stride) {
   int i, j;
   int16_t intermediate[64];
 
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 0f5d3f4e3..76ca7d22f 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -391,7 +391,7 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
       xoff = 8 * (block & twmask);
       yoff = 8 * (block >> twl);
       src_diff = p->src_diff + 4 * bw * yoff + xoff;
-      vp9_short_fdct8x8(src_diff, coeff, bw * 4);
+      vp9_fdct8x8(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -584,7 +584,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       if (tx_type != DCT_DCT)
         vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type);
       else
-        vp9_short_fdct8x8(src_diff, coeff, bw * 4);
+        vp9_fdct8x8(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                      p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index f67558982..e4baed41a 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -270,7 +270,7 @@ void vp9_short_fht4x4_sse2(int16_t *input, int16_t *output,
   write_buffer_4x4(output, in);
 }
 
-void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int stride) {
+void vp9_fdct8x8_sse2(int16_t *input, int16_t *output, int stride) {
   int pass;
   // Constants
   //    When we use them, in one case, they are all the same. In all others