16 files changed, 522 insertions, 388 deletions
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 930e896ce..a723ec030 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -24,14 +24,6 @@ static const unsigned int bmode_cts[VP8_BINTRAMODES] =
     43891, 17694, 10036, 3920, 3363, 2546, 5119, 3221, 2471, 1723
 };
 
-typedef enum
-{
-    SUBMVREF_NORMAL,
-    SUBMVREF_LEFT_ZED,
-    SUBMVREF_ABOVE_ZED,
-    SUBMVREF_LEFT_ABOVE_SAME,
-    SUBMVREF_LEFT_ABOVE_ZED
-} sumvfref_t;
 
 int vp8_mv_cont(const int_mv *l, const int_mv *a)
 {
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index fdb170df3..f4b48ff43 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -15,6 +15,15 @@
 #include "onyxc_int.h"
 #include "treecoder.h"
 
+typedef enum
+{
+    SUBMVREF_NORMAL,
+    SUBMVREF_LEFT_ZED,
+    SUBMVREF_ABOVE_ZED,
+    SUBMVREF_LEFT_ABOVE_SAME,
+    SUBMVREF_LEFT_ABOVE_ZED
+} sumvfref_t;
+
 typedef const int vp8_mbsplit[16];
 
 #define VP8_NUMMBSPLITS 4
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index c142a0415..01909b937 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -18,18 +18,14 @@
 #include "treecoder.h"
 
 
-static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
+static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp,
+                    const int *ref_frame_sign_bias)
 {
-    MV xmv;
-    xmv = mvp->as_mv;
-
     if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
     {
-        xmv.row *= -1;
-        xmv.col *= -1;
+        mvp->as_mv.row *= -1;
+        mvp->as_mv.col *= -1;
     }
-
-    mvp->as_mv = xmv;
 }
 
 #define LEFT_TOP_MARGIN (16 << 3)
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index fe0644bdd..a38b49eb9 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -506,7 +506,8 @@ void vp8_loop_filter_partial_frame
     unsigned char *y_ptr;
     int mb_row;
     int mb_col;
-    int mb_cols = post->y_width  >> 4;
+    int mb_cols = post->y_width >> 4;
+    int mb_rows = post->y_height >> 4;
 
     int linestocopy, i;
 
@@ -521,15 +522,9 @@ void vp8_loop_filter_partial_frame
 
     int lvl_seg[MAX_MB_SEGMENTS];
 
-    mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
-
-    /* 3 is a magic number. 4 is probably magic too */
-    linestocopy = (post->y_height >> (4 + 3));
-
-    if (linestocopy < 1)
-        linestocopy = 1;
-
-    linestocopy <<= 4;
+    /* number of MB rows to use in partial filtering */
+    linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
+    linestocopy = linestocopy ? linestocopy << 4 : 16;     /* 16 lines per MB */
 
     /* Note the baseline filter values for each segment */
     /* See vp8_loop_filter_frame_init. Rather than call that for each change
@@ -554,8 +549,9 @@ void vp8_loop_filter_partial_frame
         }
     }
 
-    /* Set up the buffer pointers */
-    y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
+    /* Set up the buffer pointers; partial image starts at ~middle of frame */
+    y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
+    mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
 
     /* vp8_filter each macro block */
     for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index 9887cf55b..340339a91 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -15,7 +15,10 @@
 #include "vpx_ports/mem.h"
 #include "vpx_config.h"
 
-#define MAX_LOOP_FILTER 63
+#define MAX_LOOP_FILTER             63
+/* fraction of total macroblock rows to be used in fast filter level picking */
+/* has to be > 2 */
+#define PARTIAL_FRAME_FRACTION      8
 
 typedef enum
 {
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 54547d95c..5b913ae6b 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -18,92 +18,58 @@
 #if CONFIG_DEBUG
 #include <assert.h>
 #endif
-static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
+static B_PREDICTION_MODE read_bmode(vp8_reader *bc, const vp8_prob *p)
 {
     const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
 
-    return i;
+    return (B_PREDICTION_MODE)i;
 }
 
-
-static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
+static MB_PREDICTION_MODE read_ymode(vp8_reader *bc, const vp8_prob *p)
 {
     const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
 
-    return i;
+    return (MB_PREDICTION_MODE)i;
 }
 
-static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
+static MB_PREDICTION_MODE read_kf_ymode(vp8_reader *bc, const vp8_prob *p)
 {
     const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
 
-    return i;
+    return (MB_PREDICTION_MODE)i;
 }
 
-
-
-static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
+static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p)
 {
     const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
 
-    return i;
-}
-
-static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
-{
-    /* Is segmentation enabled */
-    if (x->segmentation_enabled && x->update_mb_segmentation_map)
-    {
-        /* If so then read the segment id. */
-        if (vp8_read(r, x->mb_segment_tree_probs[0]))
-            mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
-        else
-            mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
-    }
+    return (MB_PREDICTION_MODE)i;
 }
 
-static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col)
+static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi)
 {
     vp8_reader *const bc = & pbi->bc;
     const int mis = pbi->common.mode_info_stride;
 
-        {
-            MB_PREDICTION_MODE y_mode;
-
-            /* Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
-             * By default on a key frame reset all MBs to segment 0
-             */
-            m->mbmi.segment_id = 0;
-
-            if (pbi->mb.update_mb_segmentation_map)
-                vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
-
-            /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
-            if (pbi->common.mb_no_coeff_skip)
-                m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
-            else
-                m->mbmi.mb_skip_coeff = 0;
-
-            y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, pbi->common.kf_ymode_prob);
-
-            m->mbmi.ref_frame = INTRA_FRAME;
+    mi->mbmi.ref_frame = INTRA_FRAME;
+    mi->mbmi.mode = read_kf_ymode(bc, pbi->common.kf_ymode_prob);
 
-            if ((m->mbmi.mode = y_mode) == B_PRED)
-            {
-                int i = 0;
-
-                do
-                {
-                    const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
-                    const B_PREDICTION_MODE L = left_block_mode(m, i);
+    if (mi->mbmi.mode == B_PRED)
+    {
+        int i = 0;
 
-                    m->bmi[i].as_mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
-                }
-                while (++i < 16);
-            }
+        do
+        {
+            const B_PREDICTION_MODE A = above_block_mode(mi, i, mis);
+            const B_PREDICTION_MODE L = left_block_mode(mi, i);
 
-            m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
+            mi->bmi[i].as_mode =
+                read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
         }
+        while (++i < 16);
+    }
+
+    mi->mbmi.uv_mode = read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
 }
 
 static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
@@ -172,32 +138,30 @@ static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc)
     while (++i < 2);
 }
 
-
-static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_mv_ref_tree, p);
-
-    return (MB_PREDICTION_MODE)i;
-}
-
-static B_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
+static int_mv sub_mv_ref(vp8_reader *bc, const vp8_prob *p, int_mv abovemv,
+                         int_mv leftmv, int_mv best_mv, const MV_CONTEXT * mvc)
 {
-    const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p);
-
-    return (B_PREDICTION_MODE)i;
+    int_mv blockmv;
+    blockmv.as_int = 0;
+    if( vp8_read(bc, p[0]) )
+    {
+        if( vp8_read(bc, p[1]) )
+        {
+            if( vp8_read(bc, p[2]) )
+            {
+                read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
+                blockmv.as_mv.row += best_mv.as_mv.row;
+                blockmv.as_mv.col += best_mv.as_mv.col;
+            }
+            return blockmv;
+        }
+        else
+            return abovemv;
+    }
+    else
+        return leftmv;
 }
 
-#ifdef VPX_MODE_COUNT
-unsigned int vp8_mv_cont_count[5][4] =
-{
-    { 0, 0, 0, 0 },
-    { 0, 0, 0, 0 },
-    { 0, 0, 0, 0 },
-    { 0, 0, 0, 0 },
-    { 0, 0, 0, 0 }
-};
-#endif
-
 static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1};
 static const unsigned char mbsplit_fill_offset[4][16] = {
     { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
@@ -236,7 +200,8 @@ static void mb_mode_mv_init(VP8D_COMP *pbi)
 
             do
             {
-                pbi->common.fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+                pbi->common.fc.ymode_prob[i] =
+                    (vp8_prob) vp8_read_literal(bc, 8);
             }
             while (++i < 4);
         }
@@ -247,7 +212,8 @@ static void mb_mode_mv_init(VP8D_COMP *pbi)
 
             do
             {
-                pbi->common.fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+                pbi->common.fc.uv_mode_prob[i] =
+                    (vp8_prob) vp8_read_literal(bc, 8);
             }
             while (++i < 3);
         }
@@ -256,207 +222,340 @@ static void mb_mode_mv_init(VP8D_COMP *pbi)
     }
 }
 
+const vp8_prob vp8_sub_mv_ref_prob3 [8][VP8_SUBMVREFS-1] =
+{
+    { 147, 136, 18 },   /* SUBMVREF_NORMAL          */
+    { 223, 1  , 34 },   /* SUBMVREF_LEFT_ABOVE_SAME */
+    { 106, 145, 1  },   /* SUBMVREF_LEFT_ZED        */
+    { 208, 1  , 1  },   /* SUBMVREF_LEFT_ABOVE_ZED  */
+    { 179, 121, 1  },   /* SUBMVREF_ABOVE_ZED       */
+    { 223, 1  , 34 },   /* SUBMVREF_LEFT_ABOVE_SAME */
+    { 179, 121, 1  },   /* SUBMVREF_ABOVE_ZED       */
+    { 208, 1  , 1  }    /* SUBMVREF_LEFT_ABOVE_ZED  */
+};
 
-static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
-                            int mb_row, int mb_col)
+static
+const vp8_prob * get_sub_mv_ref_prob(const int_mv *l, const int_mv *a)
 {
-    vp8_reader *const bc = & pbi->bc;
-    MV_CONTEXT *const mvc = pbi->common.fc.mvc;
-    const int mis = pbi->common.mode_info_stride;
+    int lez = (l->as_int == 0);
+    int aez = (a->as_int == 0);
+    int lea = (l->as_int == a->as_int);
+    const vp8_prob * prob;
 
-    int_mv *const mv = & mbmi->mv;
-    int mb_to_left_edge;
-    int mb_to_right_edge;
-    int mb_to_top_edge;
-    int mb_to_bottom_edge;
+    prob = vp8_sub_mv_ref_prob3[(aez << 2) |
+                                (lez << 1) |
+                                (lea)];
+
+    return prob;
+}
 
-    mb_to_top_edge = pbi->mb.mb_to_top_edge;
-    mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
-    mb_to_top_edge -= LEFT_TOP_MARGIN;
-    mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
+                        MB_MODE_INFO *mbmi, int mis, int_mv best_mv,
+                        MV_CONTEXT *const mvc, int mb_to_left_edge,
+                        int mb_to_right_edge, int mb_to_top_edge,
+                        int mb_to_bottom_edge)
+{
+    int s;      /* split configuration (16x8, 8x16, 8x8, 4x4) */
+    int num_p;  /* number of partitions in the split configuration
+                  (see vp8_mbsplit_count) */
+    int j = 0;
+
+    s = 3;
+    num_p = 16;
+    if( vp8_read(bc, 110) )
+    {
+        s = 2;
+        num_p = 4;
+        if( vp8_read(bc, 111) )
+        {
+            s = vp8_read(bc, 150);
+            num_p = 2;
+        }
+    }
 
     mbmi->need_to_clamp_mvs = 0;
-    /* Distance of Mb to the various image edges.
-     * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
-     */
-    pbi->mb.mb_to_left_edge =
-    mb_to_left_edge = -((mb_col * 16) << 3);
-    mb_to_left_edge -= LEFT_TOP_MARGIN;
+    do  /* for each subset j */
+    {
+        int_mv leftmv, abovemv;
+        int_mv blockmv;
+        int k;  /* first block in subset j */
 
-    pbi->mb.mb_to_right_edge =
-    mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
-    mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
+        const vp8_prob *prob;
+        k = vp8_mbsplit_offset[s][j];
 
-    /* If required read in new segmentation data for this MB */
-    if (pbi->mb.update_mb_segmentation_map)
-        vp8_read_mb_features(bc, mbmi, &pbi->mb);
+        leftmv.as_int = left_block_mv(mi, k);
+        abovemv.as_int = above_block_mv(mi, k, mis);
 
-    /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
-    if (pbi->common.mb_no_coeff_skip)
-        mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
-    else
-        mbmi->mb_skip_coeff = 0;
+        prob = get_sub_mv_ref_prob(&leftmv, &abovemv);
+
+        blockmv = sub_mv_ref(bc, prob, abovemv, leftmv, best_mv, mvc);
+
+        mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv,
+                                                  mb_to_left_edge,
+                                                  mb_to_right_edge,
+                                                  mb_to_top_edge,
+                                                  mb_to_bottom_edge);
+
+        {
+            /* Fill (uniform) modes, mvs of jth subset.
+             Must do it here because ensuing subsets can
+             refer back to us via "left" or "above". */
+            const unsigned char *fill_offset;
+            unsigned int fill_count = mbsplit_fill_count[s];
+
+            fill_offset = &mbsplit_fill_offset[s]
+                             [(unsigned char)j * mbsplit_fill_count[s]];
+
+            do {
+                mi->bmi[ *fill_offset].mv.as_int = blockmv.as_int;
+                fill_offset++;
+            }while (--fill_count);
+        }
 
-    if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra)))    /* inter MB */
+    }
+    while (++j < num_p);
+
+    mbmi->partitioning = s;
+}
+
+static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+                            int mb_row, int mb_col)
+{
+    vp8_reader *const bc = & pbi->bc;
+    mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra);
+    if (mbmi->ref_frame)    /* inter MB */
     {
-        int rct[4];
+        enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
         vp8_prob mv_ref_p [VP8_MVREFS-1];
-        int_mv nearest, nearby, best_mv;
+        int cnt[4];
+        int *cntx = cnt;
+        int_mv near_mvs[4];
+        int_mv *nmv = near_mvs;
+        const int mis = pbi->mb.mode_info_stride;
+        const MODE_INFO *above = mi - mis;
+        const MODE_INFO *left = mi - 1;
+        const MODE_INFO *aboveleft = above - 1;
+        MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+        int *ref_frame_sign_bias = pbi->common.ref_frame_sign_bias;
+        int propogate_mv_for_ec = 0;
+
+        mbmi->need_to_clamp_mvs = 0;
 
         if (vp8_read(bc, pbi->prob_last))
         {
-            mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, pbi->prob_gf)));
+            mbmi->ref_frame =
+                (MV_REFERENCE_FRAME)((int)(2 + vp8_read(bc, pbi->prob_gf)));
         }
 
-        vp8_find_near_mvs(&pbi->mb, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
+        /* Zero accumulators */
+        nmv[0].as_int = nmv[1].as_int = nmv[2].as_int = 0;
+        cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
 
-        vp8_mv_ref_probs(mv_ref_p, rct);
-
-        mbmi->uv_mode = DC_PRED;
-        switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
+        /* Process above */
+        if (above->mbmi.ref_frame != INTRA_FRAME)
         {
-        case SPLITMV:
-        {
-            const int s = mbmi->partitioning =
-                      vp8_treed_read(bc, vp8_mbsplit_tree, vp8_mbsplit_probs);
-            const int num_p = vp8_mbsplit_count [s];
-            int j = 0;
+            if (above->mbmi.mv.as_int)
+            {
+                (++nmv)->as_int = above->mbmi.mv.as_int;
+                mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame],
+                        mbmi->ref_frame, nmv, ref_frame_sign_bias);
+                ++cntx;
+            }
+
+            *cntx += 2;
+        }
 
-            do  /* for each subset j */
+        /* Process left */
+        if (left->mbmi.ref_frame != INTRA_FRAME)
+        {
+            if (left->mbmi.mv.as_int)
             {
-                int_mv leftmv, abovemv;
-                int_mv blockmv;
-                int k;  /* first block in subset j */
-                int mv_contz;
-                k = vp8_mbsplit_offset[s][j];
+                int_mv this_mv;
 
-                leftmv.as_int = left_block_mv(mi, k);
-                abovemv.as_int = above_block_mv(mi, k, mis);
-                mv_contz = vp8_mv_cont(&leftmv, &abovemv);
+                this_mv.as_int = left->mbmi.mv.as_int;
+                mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame],
+                        mbmi->ref_frame, &this_mv, ref_frame_sign_bias);
 
-                switch (sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
+                if (this_mv.as_int != nmv->as_int)
                 {
-                case NEW4X4:
-                    read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
-                    blockmv.as_mv.row += best_mv.as_mv.row;
-                    blockmv.as_mv.col += best_mv.as_mv.col;
-  #ifdef VPX_MODE_COUNT
-                    vp8_mv_cont_count[mv_contz][3]++;
-  #endif
-                    break;
-                case LEFT4X4:
-                    blockmv.as_int = leftmv.as_int;
-  #ifdef VPX_MODE_COUNT
-                    vp8_mv_cont_count[mv_contz][0]++;
-  #endif
-                    break;
-                case ABOVE4X4:
-                    blockmv.as_int = abovemv.as_int;
-  #ifdef VPX_MODE_COUNT
-                    vp8_mv_cont_count[mv_contz][1]++;
-  #endif
-                    break;
-                case ZERO4X4:
-                    blockmv.as_int = 0;
-  #ifdef VPX_MODE_COUNT
-                    vp8_mv_cont_count[mv_contz][2]++;
-  #endif
-                    break;
-                default:
-                    break;
+                    (++nmv)->as_int = this_mv.as_int;
+                    ++cntx;
                 }
 
-                mbmi->need_to_clamp_mvs = vp8_check_mv_bounds(&blockmv,
-                                                          mb_to_left_edge,
-                                                          mb_to_right_edge,
-                                                          mb_to_top_edge,
-                                                          mb_to_bottom_edge);
+                *cntx += 2;
+            }
+            else
+                cnt[CNT_INTRA] += 2;
+        }
+
+        /* Process above left */
+        if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
+        {
+            if (aboveleft->mbmi.mv.as_int)
+            {
+                int_mv this_mv;
 
+                this_mv.as_int = aboveleft->mbmi.mv.as_int;
+                mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame],
+                        mbmi->ref_frame, &this_mv, ref_frame_sign_bias);
+
+                if (this_mv.as_int != nmv->as_int)
                 {
-                    /* Fill (uniform) modes, mvs of jth subset.
-                     Must do it here because ensuing subsets can
-                     refer back to us via "left" or "above". */
-                    const unsigned char *fill_offset;
-                    unsigned int fill_count = mbsplit_fill_count[s];
-
-                    fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]];
-
-                    do {
-                        mi->bmi[ *fill_offset].mv.as_int = blockmv.as_int;
-                        fill_offset++;
-                    }while (--fill_count);
+                    (++nmv)->as_int = this_mv.as_int;
+                    ++cntx;
                 }
 
+                *cntx += 1;
             }
-            while (++j < num_p);
+            else
+                cnt[CNT_INTRA] += 1;
         }
 
-        mv->as_int = mi->bmi[15].mv.as_int;
-
-        break;  /* done with SPLITMV */
-
-        case NEARMV:
-            mv->as_int = nearby.as_int;
-            /* Clip "next_nearest" so that it does not extend to far out of image */
-            vp8_clamp_mv(mv, mb_to_left_edge, mb_to_right_edge,
-                         mb_to_top_edge, mb_to_bottom_edge);
-            goto propagate_mv;
-
-        case NEARESTMV:
-            mv->as_int = nearest.as_int;
-            /* Clip "next_nearest" so that it does not extend to far out of image */
-            vp8_clamp_mv(mv, mb_to_left_edge, mb_to_right_edge,
-                         mb_to_top_edge, mb_to_bottom_edge);
-            goto propagate_mv;
-
-        case ZEROMV:
-            mv->as_int = 0;
-            goto propagate_mv;
-
-        case NEWMV:
-            read_mv(bc, &mv->as_mv, (const MV_CONTEXT *) mvc);
-            mv->as_mv.row += best_mv.as_mv.row;
-            mv->as_mv.col += best_mv.as_mv.col;
-
-            /* Don't need to check this on NEARMV and NEARESTMV modes
-             * since those modes clamp the MV. The NEWMV mode does not,
-             * so signal to the prediction stage whether special
-             * handling may be required.
+        mv_ref_p[0] = vp8_mode_contexts [cnt[CNT_INTRA]] [0];
+
+        if( vp8_read(bc, mv_ref_p[0]) )
+        {
+            int mb_to_left_edge;
+            int mb_to_right_edge;
+
+            /* Distance of Mb to the various image edges.
+             * These specified to 8th pel as they are always compared to MV
+             * values that are in 1/8th pel units
              */
-            mbmi->need_to_clamp_mvs = vp8_check_mv_bounds(mv,
-                                                      mb_to_left_edge,
-                                                      mb_to_right_edge,
-                                                      mb_to_top_edge,
-                                                      mb_to_bottom_edge);
+            pbi->mb.mb_to_left_edge =
+            mb_to_left_edge = -((mb_col * 16) << 3);
+            mb_to_left_edge -= LEFT_TOP_MARGIN;
 
-        propagate_mv:  /* same MV throughout */
-#if CONFIG_ERROR_CONCEALMENT
-            if(pbi->ec_enabled)
+            pbi->mb.mb_to_right_edge =
+            mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
+            mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
+
+            /* If we have three distinct MV's ... */
+            if (cnt[CNT_SPLITMV])
             {
-                mi->bmi[ 0].mv.as_int =
-                mi->bmi[ 1].mv.as_int =
-                mi->bmi[ 2].mv.as_int =
-                mi->bmi[ 3].mv.as_int =
-                mi->bmi[ 4].mv.as_int =
-                mi->bmi[ 5].mv.as_int =
-                mi->bmi[ 6].mv.as_int =
-                mi->bmi[ 7].mv.as_int =
-                mi->bmi[ 8].mv.as_int =
-                mi->bmi[ 9].mv.as_int =
-                mi->bmi[10].mv.as_int =
-                mi->bmi[11].mv.as_int =
-                mi->bmi[12].mv.as_int =
-                mi->bmi[13].mv.as_int =
-                mi->bmi[14].mv.as_int =
-                mi->bmi[15].mv.as_int = mv->as_int;
+                /* See if above-left MV can be merged with NEAREST */
+                if (nmv->as_int == near_mvs[CNT_NEAREST].as_int)
+                    cnt[CNT_NEAREST] += 1;
             }
-#endif
-            break;
-        default:;
-  #if CONFIG_DEBUG
-            assert(0);
-  #endif
+
+            cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
+                                + (left->mbmi.mode == SPLITMV)) * 2
+                               + (aboveleft->mbmi.mode == SPLITMV);
+
+            /* Swap near and nearest if necessary */
+            if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
+            {
+                int tmp;
+                tmp = cnt[CNT_NEAREST];
+                cnt[CNT_NEAREST] = cnt[CNT_NEAR];
+                cnt[CNT_NEAR] = tmp;
+                tmp = near_mvs[CNT_NEAREST].as_int;
+                near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
+                near_mvs[CNT_NEAR].as_int = tmp;
+            }
+
+            mv_ref_p[1] = vp8_mode_contexts [cnt[CNT_NEAREST]] [1];
+
+            if( vp8_read(bc, mv_ref_p[1]) )
+            {
+                mv_ref_p[2] = vp8_mode_contexts [cnt[CNT_NEAR]] [2];
+
+                if( vp8_read(bc, mv_ref_p[2]) )
+                {
+                    int mb_to_top_edge;
+                    int mb_to_bottom_edge;
+
+                    mb_to_top_edge = pbi->mb.mb_to_top_edge;
+                    mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
+                    mb_to_top_edge -= LEFT_TOP_MARGIN;
+                    mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+
+                    /* Use near_mvs[0] to store the "best" MV */
+                    if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
+                        near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
+
+                    mv_ref_p[3] = vp8_mode_contexts [cnt[CNT_SPLITMV]] [3];
+
+                    vp8_clamp_mv2(&near_mvs[CNT_INTRA], &pbi->mb);
+
+                    if( vp8_read(bc, mv_ref_p[3]) )
+                    {
+                        decode_split_mv(bc, mi,
+                                                    mbmi, mis,
+                                                    near_mvs[CNT_INTRA],
+                                                    mvc, mb_to_left_edge,
+                                                    mb_to_right_edge,
+                                                    mb_to_top_edge,
+                                                    mb_to_bottom_edge);
+                        mbmi->mv.as_int = mi->bmi[15].mv.as_int;
+                        mbmi->mode =  SPLITMV;
+                    }
+                    else
+                    {
+                        int_mv *const mbmi_mv = & mbmi->mv;
+                        read_mv(bc, &mbmi_mv->as_mv, (const MV_CONTEXT *) mvc);
+                        mbmi_mv->as_mv.row += near_mvs[CNT_INTRA].as_mv.row;
+                        mbmi_mv->as_mv.col += near_mvs[CNT_INTRA].as_mv.col;
+
+                        /* Don't need to check this on NEARMV and NEARESTMV
+                         * modes since those modes clamp the MV. The NEWMV mode
+                         * does not, so signal to the prediction stage whether
+                         * special handling may be required.
+                         */
+                        mbmi->need_to_clamp_mvs =
+                            vp8_check_mv_bounds(mbmi_mv, mb_to_left_edge,
+                                                mb_to_right_edge,
+                                                mb_to_top_edge,
+                                                mb_to_bottom_edge);
+                        mbmi->mode =  NEWMV;
+                        propogate_mv_for_ec = 1;
+                    }
+                }
+                else
+                {
+                    mbmi->mode =  NEARMV;
+                    vp8_clamp_mv2(&near_mvs[CNT_NEAR], &pbi->mb);
+                    mbmi->mv.as_int = near_mvs[CNT_NEAR].as_int;
+                    propogate_mv_for_ec = 1;
+                }
+            }
+            else
+            {
+                mbmi->mode =  NEARESTMV;
+                vp8_clamp_mv2(&near_mvs[CNT_NEAREST], &pbi->mb);
+                mbmi->mv.as_int = near_mvs[CNT_NEAREST].as_int;
+                propogate_mv_for_ec = 1;
+            }
+        }
+        else {
+            mbmi->mode =  ZEROMV;
+            mbmi->mv.as_int = 0;
+            propogate_mv_for_ec = 1;
+        }
+
+        mbmi->uv_mode = DC_PRED;
+
+#if CONFIG_ERROR_CONCEALMENT
+        if(pbi->ec_enabled && propogate_mv_for_ec)
+        {
+            mi->bmi[ 0].mv.as_int =
+            mi->bmi[ 1].mv.as_int =
+            mi->bmi[ 2].mv.as_int =
+            mi->bmi[ 3].mv.as_int =
+            mi->bmi[ 4].mv.as_int =
+            mi->bmi[ 5].mv.as_int =
+            mi->bmi[ 6].mv.as_int =
+            mi->bmi[ 7].mv.as_int =
+            mi->bmi[ 8].mv.as_int =
+            mi->bmi[ 9].mv.as_int =
+            mi->bmi[10].mv.as_int =
+            mi->bmi[11].mv.as_int =
+            mi->bmi[12].mv.as_int =
+            mi->bmi[13].mv.as_int =
+            mi->bmi[14].mv.as_int =
+            mi->bmi[15].mv.as_int = mbmi->mv.as_int;
         }
+#endif
     }
     else
     {
@@ -464,21 +563,62 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
         mbmi->mv.as_int = 0;
 
         /* MB is intra coded */
-        if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED)
+        if ((mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED)
         {
             int j = 0;
             do
             {
-                mi->bmi[j].as_mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob);
+                mi->bmi[j].as_mode = read_bmode(bc, pbi->common.fc.bmode_prob);
             }
             while (++j < 16);
         }
 
-        mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.fc.uv_mode_prob);
+        mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob);
     }
 
 }
 
+static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
+{
+    /* Is segmentation enabled */
+    if (x->segmentation_enabled && x->update_mb_segmentation_map)
+    {
+        /* If so then read the segment id. */
+        if (vp8_read(r, x->mb_segment_tree_probs[0]))
+            mi->segment_id =
+                (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
+        else
+            mi->segment_id =
+                (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
+    }
+}
+
+static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi,
+                               MB_MODE_INFO *mbmi, int mb_row, int mb_col)
+{
+    /* Read the Macroblock segmentation map if it is being updated explicitly
+     * this frame (reset to 0 above by default)
+     * By default on a key frame reset all MBs to segment 0
+     */
+    if (pbi->mb.update_mb_segmentation_map)
+        read_mb_features(&pbi->bc, &mi->mbmi, &pbi->mb);
+    else if(pbi->common.frame_type == KEY_FRAME)
+        mi->mbmi.segment_id = 0;
+
+    /* Read the macroblock coeff skip flag if this feature is in use,
+     * else default to 0 */
+    if (pbi->common.mb_no_coeff_skip)
+        mi->mbmi.mb_skip_coeff = vp8_read(&pbi->bc, pbi->prob_skip_false);
+    else
+        mi->mbmi.mb_skip_coeff = 0;
+
+    if(pbi->common.frame_type == KEY_FRAME)
+        read_kf_modes(pbi, mi);
+    else
+        read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
+
+}
+
 void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 {
     MODE_INFO *mi = pbi->common.mi;
@@ -489,27 +629,18 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
     while (++mb_row < pbi->common.mb_rows)
     {
         int mb_col = -1;
-        int mb_to_top_edge;
-        int mb_to_bottom_edge;
-
-        pbi->mb.mb_to_top_edge =
-        mb_to_top_edge = -((mb_row * 16)) << 3;
-        mb_to_top_edge -= LEFT_TOP_MARGIN;
 
+        pbi->mb.mb_to_top_edge =  -((mb_row * 16)) << 3;
         pbi->mb.mb_to_bottom_edge =
-        mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
-        mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+            ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
 
         while (++mb_col < pbi->common.mb_cols)
         {
 #if CONFIG_ERROR_CONCEALMENT
             int mb_num = mb_row * pbi->common.mb_cols + mb_col;
 #endif
-            /*read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
-            if(pbi->common.frame_type == KEY_FRAME)
-                vp8_kfread_modes(pbi, mi, mb_row, mb_col);
-            else
-                read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
+
+            decode_mb_mode_mvs(pbi, mi, &mi->mbmi, mb_row, mb_col);
 
 #if CONFIG_ERROR_CONCEALMENT
             /* look for corruption. set mvs_corrupt_from_mb to the current
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 81f28db89..c3263082a 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -667,21 +667,19 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     if (data_end - data < 3)
     {
-        if (pbi->ec_active)
-        {
-            /* Declare the missing frame as an inter frame since it will
-               be handled as an inter frame when we have estimated its
-               motion vectors. */
-            pc->frame_type = INTER_FRAME;
-            pc->version = 0;
-            pc->show_frame = 1;
-            first_partition_length_in_bytes = 0;
-        }
-        else
+        if (!pbi->ec_active)
         {
             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                                "Truncated packet");
         }
+
+        /* Declare the missing frame as an inter frame since it will
+           be handled as an inter frame when we have estimated its
+           motion vectors. */
+        pc->frame_type = INTER_FRAME;
+        pc->version = 0;
+        pc->show_frame = 1;
+        first_partition_length_in_bytes = 0;
     }
     else
     {
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 1a71948cb..1d4568593 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -9,7 +9,6 @@
  */
 
 
-#include "vp8/common/type_aliases.h"
 #include "vp8/common/blockd.h"
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
@@ -205,33 +204,34 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
     VP8_BD_VALUE value;
     const int *scan;
     register unsigned int shift;
-    UINT32 split;
+    unsigned int split;
     VP8_BD_VALUE bigsplit;
-    INT16 *qcoeff_ptr;
+    short *qcoeff_ptr;
 
     const vp8_prob *coef_probs;
-    int type;
     int stop;
-    INT16 val, bits_count;
-    INT16 c;
-    INT16 v;
+    int val, bits_count;
+    int c;
+    int v;
     const vp8_prob *Prob;
+    int start_coeff;
+
 
-    type = 3;
     i = 0;
     stop = 16;
 
     scan = vp8_default_zig_zag1d;
     qcoeff_ptr = &x->qcoeff[0];
+    coef_probs = fc->coef_probs [3] [ 0 ] [0];
 
     if (x->mode_info_context->mbmi.mode != B_PRED &&
         x->mode_info_context->mbmi.mode != SPLITMV)
     {
         i = 24;
         stop = 24;
-        type = 1;
         qcoeff_ptr += 24*16;
         eobtotal -= 16;
+        coef_probs = fc->coef_probs [1] [ 0 ] [0];
     }
 
     bufend  = bc->user_buffer_end;
@@ -240,23 +240,24 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
     count   = bc->count;
     range   = bc->range;
 
-
-    coef_probs = fc->coef_probs [type] [ 0 ] [0];
+    start_coeff = 0;
 
 BLOCK_LOOP:
     a = A + vp8_block2above[i];
     l = L + vp8_block2left[i];
 
-    c = (INT16)(!type);
+    c = start_coeff;
 
-    /*Dest = ((A)!=0) + ((B)!=0);*/
     VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
+
     Prob = coef_probs;
     Prob += v * ENTROPY_NODES;
+    *a = *l = 0;
 
 DO_WHILE:
     Prob += coef_bands_x[c];
     DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
+    *a = *l = 1;
 
 CHECK_0_:
     DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
@@ -346,7 +347,7 @@ ONE_CONTEXT_NODE_0_:
 
     qcoeff_ptr [ 15 ] = (INT16) v;
 BLOCK_FINISHED:
-    *a = *l = ((eobs[i] = c) != !type);   /* any nonzero data? */
+    eobs[i] = c;
     eobtotal += c;
     qcoeff_ptr += 16;
 
@@ -357,18 +358,18 @@ BLOCK_FINISHED:
 
     if (i == 25)
     {
-        type = 0;
+        start_coeff = 1;
         i = 0;
         stop = 16;
-        coef_probs = fc->coef_probs [type] [ 0 ] [0];
+        coef_probs = fc->coef_probs [0] [ 0 ] [0];
         qcoeff_ptr -= (24*16 + 16);
         goto BLOCK_LOOP;
     }
 
     if (i == 16)
     {
-        type = 2;
-        coef_probs = fc->coef_probs [type] [ 0 ] [0];
+        start_coeff = 0;
+        coef_probs = fc->coef_probs [2] [ 0 ] [0];
         stop = 24;
         goto BLOCK_LOOP;
     }
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 210a5a5c5..918d7d9ae 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -14,9 +14,9 @@
 #include "vp8/encoder/variance.h"
 #include "vp8/encoder/onyx_int.h"
 
-extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+extern void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
 
 void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
 {
@@ -123,15 +123,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;
         cpi->rtcd.quantize.fastquantb_pair       = vp8_fast_quantize_b_pair_neon;
     }
-#endif
+#endif /* HAVE_ARMV7 */
+#endif /* CONFIG_RUNTIME_CPU_DETECT */
 
 #if HAVE_ARMV7
 #if CONFIG_RUNTIME_CPU_DETECT
     if (flags & HAS_NEON)
 #endif
     {
-        vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+        vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame_neon;
     }
 #endif
-#endif
 }
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 138ed46de..30513f912 100644
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -24,8 +24,9 @@
 
 ; r0 BOOL_CODER *br
 ; r1 unsigned char *source
-
+; r2 unsigned char *source_end
 |vp8_start_encode| PROC
+    str     r2,  [r0, #vp8_writer_buffer_end]
     mov     r12, #0
     mov     r3,  #255
     mvn     r2,  #23
diff --git a/vp8/encoder/arm/neon/picklpf_arm.c b/vp8/encoder/arm/neon/picklpf_arm.c
index 3fb370c3d..6610d2dc2 100644
--- a/vp8/encoder/arm/neon/picklpf_arm.c
+++ b/vp8/encoder/arm/neon/picklpf_arm.c
@@ -8,20 +8,16 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "vp8/common/loopfilter.h"
+#include "vpx_scale/yv12config.h"
 
-#include "vp8/common/onyxc_int.h"
-#include "vp8/encoder/onyx_int.h"
-#include "vp8/encoder/quantize.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/yv12extend.h"
-#include "vpx_scale/vpxscale.h"
-#include "vp8/common/alloccommon.h"
+extern void vp8_memcpy_partial_neon(unsigned char *dst_ptr,
+                                    unsigned char *src_ptr,
+                                    int sz);
 
-extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
 
-
-void
-vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
+void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
+                                      YV12_BUFFER_CONFIG *dst_ybc)
 {
     unsigned char *src_y, *dst_y;
     int yheight;
@@ -34,17 +30,19 @@ vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG
     yheight  = src_ybc->y_height;
     ystride  = src_ybc->y_stride;
 
-    linestocopy = (yheight >> (Fraction + 4));
-
-    if (linestocopy < 1)
-        linestocopy = 1;
-
-    linestocopy <<= 4;
-
-    yoffset  = ystride * ((yheight >> 5) * 16 - 8);
+    /* number of MB rows to use in partial filtering */
+    linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
+    linestocopy = linestocopy ? linestocopy << 4 : 16;     /* 16 lines per MB */
+
+    /* Copy extra 4 so that full filter context is available if filtering done
+     * on the copied partial frame and not original. Partial filter does mb
+     * filtering for top row also, which can modify3 pixels above.
+     */
+    linestocopy += 4;
+    /* partial image starts at ~middle of frame (macroblock border) */
+    yoffset  = ystride * (((yheight >> 5) * 16) - 4);
     src_y = src_ybc->y_buffer + yoffset;
     dst_y = dst_ybc->y_buffer + yoffset;
 
-    //vpx_memcpy (dst_y, src_y, ystride * (linestocopy +16));
-    vp8_memcpy_neon((unsigned char *)dst_y, (unsigned char *)src_y, (int)(ystride *(linestocopy + 16)));
+    vp8_memcpy_partial_neon(dst_y, src_y, ystride * linestocopy);
 }
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index b0450e523..5b9f11e59 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8_memcpy_neon|
+    EXPORT |vp8_memcpy_partial_neon|
 
     ARM
     REQUIRE8
@@ -17,8 +17,10 @@
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
 ;=========================================
-;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
-|vp8_memcpy_neon| PROC
+;this is not a full memcpy function!!!
+;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
+;                             int sz);
+|vp8_memcpy_partial_neon| PROC
     ;pld                [r1]                        ;preload pred data
     ;pld                [r1, #128]
     ;pld                [r1, #256]
diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c
index c79e915f8..d05dab47c 100644
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -49,6 +49,7 @@ DEFINE(vp8_writer_value,                        offsetof(vp8_writer, value));
 DEFINE(vp8_writer_count,                        offsetof(vp8_writer, count));
 DEFINE(vp8_writer_pos,                          offsetof(vp8_writer, pos));
 DEFINE(vp8_writer_buffer,                       offsetof(vp8_writer, buffer));
+DEFINE(vp8_writer_buffer_end,                   offsetof(vp8_writer, buffer_end));
 
 DEFINE(tokenextra_token,                        offsetof(TOKENEXTRA, Token));
 DEFINE(tokenextra_extra,                        offsetof(TOKENEXTRA, Extra));
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index c4e12ff02..748942b23 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -84,7 +84,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
             for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
             {
 
-                int i;
                 int recon_yoffset, recon_uvoffset;
                 int mb_col;
                 int ref_fb_idx = cm->lst_fb_idx;
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 28526f322..1a6fce9e1 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -17,8 +17,10 @@
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
 void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
 
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
+                                        YV12_BUFFER_CONFIG *dst_ybc);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
+                                        YV12_BUFFER_CONFIG *dst_ybc);
 
 void vp8_cmachine_specific_config(VP8_COMP *cpi)
 {
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index e01c59e2e..c1e5f7797 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -29,12 +29,11 @@ extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
 #define IF_RTCD(x) NULL
 #endif
 
-extern void
-(*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
-                                   YV12_BUFFER_CONFIG *dst_ybc,
-                                   int Fraction);
-void
-vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
+                                               YV12_BUFFER_CONFIG *dst_ybc);
+
+void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
+                                 YV12_BUFFER_CONFIG *dst_ybc)
 {
     unsigned char *src_y, *dst_y;
     int yheight;
@@ -47,21 +46,26 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst
     yheight  = src_ybc->y_height;
     ystride  = src_ybc->y_stride;
 
-    linestocopy = (yheight >> (Fraction + 4));
-
-    if (linestocopy < 1)
-        linestocopy = 1;
-
-    linestocopy <<= 4;
-
-    yoffset  = ystride * ((yheight >> 5) * 16 - 8);
+    /* number of MB rows to use in partial filtering */
+    linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
+    linestocopy = linestocopy ? linestocopy << 4 : 16;     /* 16 lines per MB */
+
+    /* Copy extra 4 so that full filter context is available if filtering done
+     * on the copied partial frame and not original. Partial filter does mb
+     * filtering for top row also, which can modify3 pixels above.
+     */
+    linestocopy += 4;
+    /* partial image starts at ~middle of frame (macroblock border)*/
+    yoffset  = ystride * (((yheight >> 5) * 16) - 4);
     src_y = src_ybc->y_buffer + yoffset;
     dst_y = dst_ybc->y_buffer + yoffset;
 
-    vpx_memcpy(dst_y, src_y, ystride *(linestocopy + 16));
+    vpx_memcpy(dst_y, src_y, ystride * linestocopy);
 }
 
-static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd)
+static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
+                                YV12_BUFFER_CONFIG *dest,
+                                const vp8_variance_rtcd_vtable_t *rtcd)
 {
     int i, j;
     int Total = 0;
@@ -69,17 +73,16 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
     unsigned char *src = source->y_buffer;
     unsigned char *dst = dest->y_buffer;
 
-    int linestocopy = (source->y_height >> (Fraction + 4));
-    (void)rtcd;
-
-    if (linestocopy < 1)
-        linestocopy = 1;
+    int linestocopy;
 
-    linestocopy <<= 4;
+    /* number of MB rows to use in partial filtering */
+    linestocopy = (source->y_height >> 4) / PARTIAL_FRAME_FRACTION;
+    linestocopy = linestocopy ? linestocopy << 4 : 16;     /* 16 lines per MB */
 
 
-    srcoffset = source->y_stride   * (dest->y_height >> 5) * 16;
-    dstoffset = dest->y_stride     * (dest->y_height >> 5) * 16;
+    /* partial image starts at ~middle of frame (macroblock border)*/
+    srcoffset = source->y_stride * ((dest->y_height >> 5) * 16);
+    dstoffset = dest->y_stride   * ((dest->y_height >> 5) * 16);
 
     src += srcoffset;
     dst += dstoffset;
@@ -90,7 +93,9 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
         for (j = 0; j < source->y_width; j += 16)
         {
             unsigned int sse;
-            Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+            Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride,
+                                                     dst + j, dest->y_stride,
+                                                     &sse);
         }
 
         src += 16 * source->y_stride;
@@ -105,7 +110,8 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
 {
     int min_filter_level;
 
-    if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame)
+    if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame &&
+        !cpi->common.refresh_alt_ref_frame)
         min_filter_level = 0;
     else
     {
@@ -148,7 +154,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
     int best_filt_val = cm->filter_level;
 
     //  Make a copy of the unfiltered / processed recon buffer
-    vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf, 3);
+    vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
 
     if (cm->frame_type == KEY_FRAME)
         cm->sharpness_level = 0;
@@ -173,10 +179,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
     // Get the err using the previous frame's filter value.
     vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
 
-    best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+    best_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
 
     //  Re-instate the unfiltered frame
-    vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
+    vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
 
     filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
 
@@ -187,11 +193,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
         vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
 
         // Get the err for filtered frame
-        filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+        filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
 
         //  Re-instate the unfiltered frame
-        vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
-
+        vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
 
         // Update the best case record or exit loop.
         if (filt_err < best_err)
@@ -220,10 +225,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
             vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
 
             // Get the err for filtered frame
-            filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+            filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
 
             //  Re-instate the unfiltered frame
-            vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
+            vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
 
             // Update the best case record or exit loop.
             if (filt_err < best_err)