13 files changed, 245 insertions, 206 deletions
diff --git a/examples/decode_to_md5.txt b/examples/decode_to_md5.txt
index b3dd56876..ea0eb69cd 100644
--- a/examples/decode_to_md5.txt
+++ b/examples/decode_to_md5.txt
@@ -34,8 +34,8 @@ MD5Init(&md5);
 for(plane=0; plane < 3; plane++) {
     unsigned char *buf =img->planes[plane];
 
-    for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        MD5Update(&md5, buf, img->d_w >> (plane?1:0));
+    for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
+        MD5Update(&md5, buf, (plane ? (img->d_w + 1) >> 1 : img->d_w));
         buf += img->stride[plane];
     }
 }
diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
index 7dd05d1ff..92a2c3013 100644
--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -47,8 +47,9 @@ while((img = vpx_codec_get_frame(&codec, &iter))) {
 for(plane=0; plane < 3; plane++) {
     unsigned char *buf =img->planes[plane];
 
-    for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        if(fwrite(buf, 1, img->d_w >> (plane?1:0), outfile));
+    for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
+        if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
+           outfile));
         buf += img->stride[plane];
     }
 }
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 65ad43559..d9558875a 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -215,6 +215,7 @@ typedef struct MacroBlockD
 
     /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
     BLOCKD block[25];
+    int fullpixel_mask;
 
     YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
     YV12_BUFFER_CONFIG dst;
@@ -305,20 +306,4 @@ typedef struct MacroBlockD
 extern void vp8_build_block_doffsets(MACROBLOCKD *x);
 extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
 
-static void update_blockd_bmi(MACROBLOCKD *xd)
-{
-    int i;
-    int is_4x4;
-    is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
-              (xd->mode_info_context->mbmi.mode == B_PRED);
-
-    if (is_4x4)
-    {
-        for (i = 0; i < 16; i++)
-        {
-            xd->block[i].bmi = xd->mode_info_context->bmi[i];
-        }
-    }
-}
-
 #endif  /* __INC_BLOCKD_H */
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 84cda1334..064a8355c 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -19,10 +19,6 @@
 #include "onyxc_int.h"
 #endif
 
-static const int bbb[4] = {0, 2, 8, 10};
-
-
-
 void vp8_copy_mem16x16_c(
     unsigned char *src,
     int src_stride,
@@ -203,54 +199,109 @@ static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
 
 
 /*encoder only*/
-void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
+void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
 {
-    int i;
+    unsigned char *uptr, *vptr;
+    unsigned char *upred_ptr = &x->predictor[256];
+    unsigned char *vpred_ptr = &x->predictor[320];
 
-    if (x->mode_info_context->mbmi.mode != SPLITMV)
-    {
-        unsigned char *uptr, *vptr;
-        unsigned char *upred_ptr = &x->predictor[256];
-        unsigned char *vpred_ptr = &x->predictor[320];
+    int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+    int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+    int offset;
+    int pre_stride = x->block[16].pre_stride;
 
-        int mv_row = x->block[16].bmi.mv.as_mv.row;
-        int mv_col = x->block[16].bmi.mv.as_mv.col;
-        int offset;
-        int pre_stride = x->block[16].pre_stride;
+    /* calc uv motion vectors */
+    if (mv_row < 0)
+        mv_row -= 1;
+    else
+        mv_row += 1;
 
-        offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
-        uptr = x->pre.u_buffer + offset;
-        vptr = x->pre.v_buffer + offset;
+    if (mv_col < 0)
+        mv_col -= 1;
+    else
+        mv_col += 1;
 
-        if ((mv_row | mv_col) & 7)
-        {
-            x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
-            x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
-        }
-        else
-        {
-            RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
-            RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
-        }
+    mv_row /= 2;
+    mv_col /= 2;
+
+    mv_row &= x->fullpixel_mask;
+    mv_col &= x->fullpixel_mask;
+
+    offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+    uptr = x->pre.u_buffer + offset;
+    vptr = x->pre.v_buffer + offset;
+
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
+        x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
     }
     else
     {
-        for (i = 16; i < 24; i += 2)
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
+    }
+}
+
+/*encoder only*/
+void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
+{
+    int i, j;
+
+    /* build uv mvs */
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
         {
-            BLOCKD *d0 = &x->block[i];
-            BLOCKD *d1 = &x->block[i+1];
+            int yoffset = i * 8 + j * 2;
+            int uoffset = 16 + i * 2 + j;
+            int voffset = 20 + i * 2 + j;
 
-            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-                build_inter_predictors2b(x, d0, 8);
-            else
-            {
-                vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
-                vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
-            }
+            int temp;
+
+            temp = x->block[yoffset  ].bmi.mv.as_mv.row
+                   + x->block[yoffset+1].bmi.mv.as_mv.row
+                   + x->block[yoffset+4].bmi.mv.as_mv.row
+                   + x->block[yoffset+5].bmi.mv.as_mv.row;
+
+            if (temp < 0) temp -= 4;
+            else temp += 4;
+
+            x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
+
+            temp = x->block[yoffset  ].bmi.mv.as_mv.col
+                   + x->block[yoffset+1].bmi.mv.as_mv.col
+                   + x->block[yoffset+4].bmi.mv.as_mv.col
+                   + x->block[yoffset+5].bmi.mv.as_mv.col;
+
+            if (temp < 0) temp -= 4;
+            else temp += 4;
+
+            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
+
+            x->block[voffset].bmi.mv.as_mv.row =
+                x->block[uoffset].bmi.mv.as_mv.row ;
+            x->block[voffset].bmi.mv.as_mv.col =
+                x->block[uoffset].bmi.mv.as_mv.col ;
+        }
+    }
+
+    for (i = 16; i < 24; i += 2)
+    {
+        BLOCKD *d0 = &x->block[i];
+        BLOCKD *d1 = &x->block[i+1];
+
+        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
+            build_inter_predictors2b(x, d0, 8);
+        else
+        {
+            vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
+            vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
         }
     }
 }
 
+
 /*encoder only*/
 void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
 {
@@ -302,8 +353,23 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
         RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
     }
 
-    mv_row = x->block[16].bmi.mv.as_mv.row;
-    mv_col = x->block[16].bmi.mv.as_mv.col;
+    /* calc uv motion vectors */
+    if (mv_row < 0)
+        mv_row -= 1;
+    else
+        mv_row += 1;
+
+    if (mv_col < 0)
+        mv_col -= 1;
+    else
+        mv_col += 1;
+
+    mv_row /= 2;
+    mv_col /= 2;
+
+    mv_row &= x->fullpixel_mask;
+    mv_col &= x->fullpixel_mask;
+
     pre_stride >>= 1;
     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
     uptr = x->pre.u_buffer + offset;
@@ -322,17 +388,21 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
 
 }
 
-void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x)
+static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
 {
     int i;
 
     if (x->mode_info_context->mbmi.partitioning < 3)
     {
-        for (i = 0; i < 4; i++)
-        {
-            BLOCKD *d = &x->block[bbb[i]];
-            build_inter_predictors4b(x, d, 16);
-        }
+        x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
+        x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
+        x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
+        x->block[10].bmi = x->mode_info_context->bmi[10];
+
+        build_inter_predictors4b(x, &x->block[ 0], 16);
+        build_inter_predictors4b(x, &x->block[ 2], 16);
+        build_inter_predictors4b(x, &x->block[ 8], 16);
+        build_inter_predictors4b(x, &x->block[10], 16);
     }
     else
     {
@@ -341,6 +411,9 @@ void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x)
             BLOCKD *d0 = &x->block[i];
             BLOCKD *d1 = &x->block[i+1];
 
+            x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
+            x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
+
             if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
                 build_inter_predictors2b(x, d0, 16);
             else
@@ -368,98 +441,60 @@ void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x)
     }
 }
 
-void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
-{
-    if (x->mode_info_context->mbmi.mode != SPLITMV)
-    {
-        vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256],
-                                           &x->predictor[320], 16, 8);
-    }
-    else
-    {
-        vp8_build_inter4x4_predictors_mb(x);
-    }
-}
-
-void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
+static
+void build_4x4uvmvs(MACROBLOCKD *x)
 {
     int i, j;
 
-    if (x->mode_info_context->mbmi.mode == SPLITMV)
+    for (i = 0; i < 2; i++)
     {
-        for (i = 0; i < 2; i++)
+        for (j = 0; j < 2; j++)
         {
-            for (j = 0; j < 2; j++)
-            {
-                int yoffset = i * 8 + j * 2;
-                int uoffset = 16 + i * 2 + j;
-                int voffset = 20 + i * 2 + j;
-
-                int temp;
+            int yoffset = i * 8 + j * 2;
+            int uoffset = 16 + i * 2 + j;
+            int voffset = 20 + i * 2 + j;
 
-                temp = x->block[yoffset  ].bmi.mv.as_mv.row
-                       + x->block[yoffset+1].bmi.mv.as_mv.row
-                       + x->block[yoffset+4].bmi.mv.as_mv.row
-                       + x->block[yoffset+5].bmi.mv.as_mv.row;
+            int temp;
 
-                if (temp < 0) temp -= 4;
-                else temp += 4;
+            temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
+                 + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
+                 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
+                 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
 
-                x->block[uoffset].bmi.mv.as_mv.row = temp / 8;
+            if (temp < 0) temp -= 4;
+            else temp += 4;
 
-                if (fullpixel)
-                    x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & 0xfffffff8;
+            x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
 
-                temp = x->block[yoffset  ].bmi.mv.as_mv.col
-                       + x->block[yoffset+1].bmi.mv.as_mv.col
-                       + x->block[yoffset+4].bmi.mv.as_mv.col
-                       + x->block[yoffset+5].bmi.mv.as_mv.col;
+            temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
+                 + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
+                 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
+                 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
 
-                if (temp < 0) temp -= 4;
-                else temp += 4;
+            if (temp < 0) temp -= 4;
+            else temp += 4;
 
-                x->block[uoffset].bmi.mv.as_mv.col = temp / 8;
+            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
 
-                if (fullpixel)
-                    x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & 0xfffffff8;
-
-                x->block[voffset].bmi.mv.as_mv.row = x->block[uoffset].bmi.mv.as_mv.row ;
-                x->block[voffset].bmi.mv.as_mv.col = x->block[uoffset].bmi.mv.as_mv.col ;
-            }
+            x->block[voffset].bmi.mv.as_mv.row =
+                x->block[uoffset].bmi.mv.as_mv.row ;
+            x->block[voffset].bmi.mv.as_mv.col =
+                x->block[uoffset].bmi.mv.as_mv.col ;
         }
     }
+}
+
+void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
+{
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256],
+                                           &x->predictor[320], 16, 8);
+    }
     else
     {
-        int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
-        int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
-
-        if (mvrow < 0)
-            mvrow -= 1;
-        else
-            mvrow += 1;
-
-        if (mvcol < 0)
-            mvcol -= 1;
-        else
-            mvcol += 1;
-
-        mvrow /= 2;
-        mvcol /= 2;
-
-        for (i = 0; i < 8; i++)
-        {
-            x->block[ 16 + i].bmi.mv.as_mv.row = mvrow;
-            x->block[ 16 + i].bmi.mv.as_mv.col = mvcol;
-
-            if (fullpixel)
-            {
-                x->block[ 16 + i].bmi.mv.as_mv.row = mvrow & 0xfffffff8;
-                x->block[ 16 + i].bmi.mv.as_mv.col = mvcol & 0xfffffff8;
-            }
-        }
+        build_4x4uvmvs(x);
+        build_inter4x4_predictors_mb(x);
     }
 }
 
-
-
-
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index a68e4aaba..456812ecd 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -22,8 +22,9 @@ extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
 
 
 extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
 extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
-extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x);
+
+extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
+extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
 
 #endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 69f2905d4..523352c69 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -306,8 +306,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
         for (i = 0; i < 16; i++)
         {
             BLOCKD *b = &xd->block[i];
+            int b_mode = xd->mode_info_context->bmi[i].as_mode;
+
             RECON_INVOKE(RTCD_VTABLE(recon), intra4x4_predict)
-                          (b, b->bmi.as_mode, b->predictor);
+                          (b, b_mode, b->predictor);
 
             if (xd->eobs[i] > 1)
             {
@@ -509,8 +511,6 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
         }
 #endif
 
-        update_blockd_bmi(xd);
-
         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
@@ -535,17 +535,9 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
             xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
         }
 
-        vp8_build_uvmvs(xd, pc->full_pixel);
-
 #ifdef DEC_DEBUG
         dec_debug = (pc->current_video_frame==5 && mb_row==2 && mb_col==3);
 #endif
-        /*
-        if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0)
-        pbi->debugoutput =1;
-        else
-        pbi->debugoutput =0;
-        */
         decode_macroblock(pbi, xd, mb_row * pc->mb_cols  + mb_col);
 
         /* check if the boolean decoder has suffered an error */
@@ -793,6 +785,11 @@ static void init_frame(VP8D_COMP *pbi)
     xd->mode_info_context->mbmi.mode = DC_PRED;
     xd->mode_info_stride = pc->mode_info_stride;
     xd->corrupted = 0; /* init without corruption */
+
+    xd->fullpixel_mask = 0xffffffff;
+    if(pc->full_pixel)
+        xd->fullpixel_mask = 0xfffffff8;
+
 }
 
 int vp8_decode_frame(VP8D_COMP *pbi)
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index b60b7e8bf..a1a165fd2 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -28,7 +28,6 @@
 
 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
 extern void clamp_mvs(MACROBLOCKD *xd);
-extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
@@ -91,6 +90,11 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
         {
             mbd->block[j].dequant = xd->block[j].dequant;
         }
+
+        mbd->fullpixel_mask = 0xffffffff;
+        if(pc->full_pixel)
+            mbd->fullpixel_mask = 0xfffffff8;
+
     }
 
     for (i=0; i< pc->mb_rows; i++)
@@ -220,8 +224,9 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
         for (i = 0; i < 16; i++)
         {
             BLOCKD *b = &xd->block[i];
+            int b_mode = xd->mode_info_context->bmi[i].as_mode;
 
-            vp8mt_predict_intra4x4(pbi, xd, b->bmi.as_mode, b->predictor, mb_row, mb_col, i);
+            vp8mt_predict_intra4x4(pbi, xd, b_mode, b->predictor, mb_row, mb_col, i);
 
             if (xd->eobs[i] > 1)
             {
@@ -321,8 +326,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
                             }
                         }
 
-                        update_blockd_bmi(xd);
-
                         /* Distance of MB to the various image edges.
                          * These are specified to 8th pel as they are always
                          * compared to values that are in 1/8th pel units.
@@ -386,7 +389,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
                             xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
                         }
 
-                        vp8_build_uvmvs(xd, pc->full_pixel);
                         decode_macroblock(pbi, xd, mb_row, mb_col);
 
                         /* check if the boolean decoder has suffered an error */
@@ -827,8 +829,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                     }
                 }
 
-                update_blockd_bmi(xd);
-
                 /* Distance of MB to the various image edges.
                  * These are specified to 8th pel as they are always compared to
                  * values that are in 1/8th pel units.
@@ -887,7 +887,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                     xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
                 }
 
-                vp8_build_uvmvs(xd, pc->full_pixel);
                 decode_macroblock(pbi, xd, mb_row, mb_col);
 
                 /* check if the boolean decoder has suffered an error */
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index a37727510..1ccf134af 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -564,7 +564,6 @@ void encode_mb_row(VP8_COMP *cpi,
                    int *segment_counts,
                    int *totalrate)
 {
-    int i;
     int recon_yoffset, recon_uvoffset;
     int mb_col;
     int ref_fb_idx = cm->lst_fb_idx;
@@ -754,10 +753,6 @@ void encode_mb_row(VP8_COMP *cpi,
         else
             xd->mode_info_context->mbmi.segment_id = 1;
 #endif
-        /* save the block info */
-        for (i = 0; i < 16; i++)
-            xd->mode_info_context->bmi[i] = xd->block[i].bmi;
-
         // adjust to the next column of macroblocks
         x->src.y_buffer += 16;
         x->src.u_buffer += 8;
@@ -918,6 +913,9 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
                                         + vp8_cost_one(cpi->prob_gf_coded);
     }
 
+    xd->fullpixel_mask = 0xffffffff;
+    if(cm->full_pixel)
+        xd->fullpixel_mask = 0xfffffff8;
 }
 
 void vp8_encode_frame(VP8_COMP *cpi)
@@ -1654,8 +1652,6 @@ int vp8cx_encode_inter_macroblock
           xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_inter(x) << 1);
 #endif
 
-        vp8_build_uvmvs(xd, cpi->common.full_pixel);
-
         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
             ref_fb_idx = cpi->common.lst_fb_idx;
         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 408a5956e..8722cf8ae 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1268,18 +1268,3 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     RECON_INVOKE(&rtcd->common->recon, recon_mby)
         (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
-
-void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
-{
-    vp8_build_inter_predictors_mbuv(&x->e_mbd);
-    ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
-#if CONFIG_T8X8
-    if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2)
-       vp8_transform_mbuv_8x8(x);
-    else
-#endif
-    vp8_transform_mbuv(x);
-
-    vp8_quantize_mbuv(x);
-
-}
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 73f1ad223..498afe785 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -99,7 +99,7 @@ void vp8_build_dcblock(MACROBLOCK *b);
 void vp8_transform_mb(MACROBLOCK *mb);
 void vp8_transform_mbuv(MACROBLOCK *x);
 void vp8_transform_intra_mby(MACROBLOCK *x);
-void vp8_encode_inter16x16uvrd(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+
 void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 void vp8_encode_inter16x16y(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 4820729ea..11e0e408f 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -240,10 +240,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                     // Increment the activity mask pointers.
                     x->mb_activity_ptr++;
 
-                    /* save the block info */
-                    for (i = 0; i < 16; i++)
-                        xd->mode_info_context->bmi[i] = xd->block[i].bmi;
-
                     // adjust to the next column of macroblocks
                     x->src.y_buffer += 16;
                     x->src.u_buffer += 8;
@@ -474,6 +470,9 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
 
         setup_mbby_copy(&mbr_ei[i].mb, x);
 
+        mbd->fullpixel_mask = 0xffffffff;
+        if(cm->full_pixel)
+            mbd->fullpixel_mask = 0xfffffff8;
     }
 }
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 9cdaf7d53..bfac3fa8f 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -775,6 +775,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
             x->e_mbd.mode_info_context->mbmi.mv.as_int =
                                                     mode_mv[this_mode].as_int;
 
+            /* Exit early and don't compute the distortion if this macroblock is marked inactive. */
+            if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+            {
+                sse = 0;
+                distortion2 = 0;
+                x->skip = 1;
+                break;
+            }
+
             if((this_mode != NEWMV) ||
                 !(have_subp_search) || cpi->common.full_pixel==1)
                 distortion2 = get_inter_mbpred_error(x,
@@ -783,11 +792,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 
             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
 
-            if (cpi->active_map_enabled && x->active_ptr[0] == 0)
-            {
-                x->skip = 1;
-            }
-            else if (sse < x->encode_breakout)
+            if (sse < x->encode_breakout)
             {
                 // Check u and v to make sure skip is ok
                 int sse2 = 0;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 952977094..416235d2b 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -498,14 +498,23 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
 
     unsigned int sse1 = 0;
     unsigned int sse2 = 0;
-    int mv_row;
-    int mv_col;
+    int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
+    int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
     int offset;
     int pre_stride = x->e_mbd.block[16].pre_stride;
 
-    vp8_build_uvmvs(&x->e_mbd, 0);
-    mv_row = x->e_mbd.block[16].bmi.mv.as_mv.row;
-    mv_col = x->e_mbd.block[16].bmi.mv.as_mv.col;
+    if (mv_row < 0)
+        mv_row -= 1;
+    else
+        mv_row += 1;
+
+    if (mv_col < 0)
+        mv_col -= 1;
+    else
+        mv_col += 1;
+
+    mv_row /= 2;
+    mv_col /= 2;
 
     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
     uptr = x->e_mbd.pre.u_buffer + offset;
@@ -849,11 +858,36 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
 }
 
 
-static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
+static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
+                            int *distortion, int fullpixel)
 {
-    vp8_build_uvmvs(&x->e_mbd, fullpixel);
-    vp8_encode_inter16x16uvrd(IF_RTCD(&cpi->rtcd), x);
+    vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
+    ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
+        x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 
+#if CONFIG_T8X8
+    if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2)
+       vp8_transform_mbuv_8x8(x);
+    else
+#endif
+    vp8_transform_mbuv(x);
+    vp8_quantize_mbuv(x);
+
+    *rate       = rd_cost_mbuv(x);
+    *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
+
+    return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
+}
+
+static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
+                          int *distortion, int fullpixel)
+{
+    vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
+    ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
+        x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
+
+    vp8_transform_mbuv(x);
+    vp8_quantize_mbuv(x);
 
     *rate       = rd_cost_mbuv(x);
     *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
@@ -2019,7 +2053,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             if (tmp_rd < best_yrd)
             {
                 // Now work out UV cost and add it in
-                vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
+                rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
                 rate2 += rate_uv;
                 distortion2 += distortion_uv;
             }
@@ -2270,7 +2304,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             distortion2 += distortion;
 
             // UV cost and distortion
-            vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
+            rd_inter16x16_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
             rate2 += rate_uv;
             distortion2 += distortion_uv;
             break;
@@ -2448,13 +2482,13 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     if (best_mbmode.mode == B_PRED)
     {
         for (i = 0; i < 16; i++)
-          x->e_mbd.block[i].bmi.as_mode = best_bmodes[i].as_mode;
+            xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
     }
 
     if (best_mbmode.mode == SPLITMV)
     {
         for (i = 0; i < 16; i++)
-            x->e_mbd.block[i].bmi.mv.as_int = best_bmodes[i].mv.as_int;
+            xd->mode_info_context->bmi[i].mv.as_int = best_bmodes[i].mv.as_int;
 
         vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
@@ -2464,6 +2498,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
 
+
+
 }
 
 void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)