16 files changed, 638 insertions, 312 deletions
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 00b74387c..0e23116ce 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -593,8 +593,11 @@ sym(vp8_intra_pred_uv_ho_%1):
     push        rsi
     push        rdi
 %ifidn %1, ssse3
+%ifndef GET_GOT_SAVE_ARG
     push        rbx
 %endif
+    GET_GOT     rbx
+%endif
     ; end prolog
 
     ; read from left and write out
@@ -606,9 +609,9 @@ sym(vp8_intra_pred_uv_ho_%1):
     mov         rdi,        arg(0) ;dst;
     movsxd      rcx,        dword ptr arg(1) ;dst_stride
 %ifidn %1, ssse3
-    lea         rbx,        [rax*3]
     lea         rdx,        [rcx*3]
     movdqa      xmm2,       [GLOBAL(dc_00001111)]
+    lea         rbx,        [rax*3]
 %endif
     dec         rsi
 %ifidn %1, mmx2
@@ -656,8 +659,11 @@ vp8_intra_pred_uv_ho_%1_loop:
 
     ; begin epilog
 %ifidn %1, ssse3
+    RESTORE_GOT
+%ifndef GET_GOT_SAVE_ARG
     pop         rbx
 %endif
+%endif
     pop         rdi
     pop         rsi
     UNSHADOW_ARGS
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 82a93cdab..0d14b545c 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -65,7 +65,9 @@ typedef struct
 {
     DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
     DECLARE_ALIGNED(16, short, coeff[400]);     // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+    DECLARE_ALIGNED(16, unsigned char, thismb[256]);
 
+    unsigned char *thismb_ptr;
     // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries
     BLOCK block[25];
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 132e50858..0efb9f646 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -292,6 +292,9 @@ static void build_activity_map( VP8_COMP *cpi )
             xd->left_available = (mb_col != 0);
             recon_yoffset += 16;
 #endif
+            //Copy current mb to a buffer
+            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+
             // measure activity
             mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
 
@@ -432,6 +435,9 @@ void encode_mb_row(VP8_COMP *cpi,
         x->rddiv = cpi->RDDIV;
         x->rdmult = cpi->RDMULT;
 
+        //Copy current mb to a buffer
+        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+
 #if CONFIG_MULTITHREAD
         if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
         {
@@ -1015,14 +1021,18 @@ void vp8_build_block_offsets(MACROBLOCK *x)
     vp8_build_block_doffsets(&x->e_mbd);
 
     // y blocks
+    x->thismb_ptr = &x->thismb[0];
     for (br = 0; br < 4; br++)
     {
         for (bc = 0; bc < 4; bc++)
         {
             BLOCK *this_block = &x->block[block];
-            this_block->base_src = &x->src.y_buffer;
-            this_block->src_stride = x->src.y_stride;
-            this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+            //this_block->base_src = &x->src.y_buffer;
+            //this_block->src_stride = x->src.y_stride;
+            //this_block->src = 4 * br * this_block->src_stride + 4 * bc;
+            this_block->base_src = &x->thismb_ptr;
+            this_block->src_stride = 16;
+            this_block->src = 4 * br * 16 + 4 * bc;
             ++block;
         }
     }
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index f076bbbb3..59db0253b 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -91,11 +91,11 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
 
 void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
-    int b;
+    BLOCK *b = &x->block[0];
 
     RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
 
-    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
+    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
 
     vp8_transform_intra_mby(x);
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index fd42ee417..3ed16b681 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -101,7 +101,9 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in
 
 static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
-    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
+    BLOCK *b = &x->block[0];
+
+    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 }
 
@@ -598,9 +600,11 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 /* this funciton is used by first pass only */
 void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
+    BLOCK *b = &x->block[0];
+
     vp8_build_inter16x16_predictors_mby(&x->e_mbd);
 
-    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
+    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
 
     transform_mby(x);
 
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 420ed8eff..1a37f03b9 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -148,6 +148,9 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                     x->rddiv = cpi->RDDIV;
                     x->rdmult = cpi->RDMULT;
 
+                    //Copy current mb to a buffer
+                    RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+
                     if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
                         vp8_activity_masking(cpi, x);
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 71e285f48..4fc6a8a58 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -16,6 +16,7 @@
 #include "encodeintra.h"
 #include "vp8/common/setupintrarecon.h"
 #include "mcomp.h"
+#include "firstpass.h"
 #include "vpx_scale/vpxscale.h"
 #include "encodemb.h"
 #include "vp8/common/extend.h"
@@ -49,7 +50,7 @@ extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
 
 extern const int vp8_gf_boost_qadjustment[QINDEX_RANGE];
 
-#define IIFACTOR   1.4
+#define IIFACTOR   1.5
 #define IIKFACTOR1 1.40
 #define IIKFACTOR2 1.5
 #define RMAX       14.0
@@ -63,6 +64,8 @@ extern const int vp8_gf_boost_qadjustment[QINDEX_RANGE];
 #define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0
 #define POW2 (double)cpi->oxcf.two_pass_vbrbias/100.0
 
+#define NEW_BOOST 1
+
 static int vscale_lookup[7] = {0, 1, 1, 2, 2, 3, 3};
 static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3};
 
@@ -96,6 +99,146 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
     return 1;
 }
 
+// Read frame stats at an offset from the current position
+static int read_frame_stats( VP8_COMP *cpi,
+                             FIRSTPASS_STATS *frame_stats,
+                             int offset )
+{
+    FIRSTPASS_STATS * fps_ptr = cpi->twopass.stats_in;
+
+    // Check legality of offset
+    if ( offset >= 0 )
+    {
+        if ( &fps_ptr[offset] >= cpi->twopass.stats_in_end )
+             return EOF;
+    }
+    else if ( offset < 0 )
+    {
+        if ( &fps_ptr[offset] < cpi->twopass.stats_in_start )
+             return EOF;
+    }
+
+    *frame_stats = fps_ptr[offset];
+    return 1;
+}
+
+static int input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
+{
+    if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
+        return EOF;
+
+    *fps = *cpi->twopass.stats_in;
+    cpi->twopass.stats_in =
+         (void*)((char *)cpi->twopass.stats_in + sizeof(FIRSTPASS_STATS));
+    return 1;
+}
+
+static void output_stats(const VP8_COMP            *cpi,
+                         struct vpx_codec_pkt_list *pktlist,
+                         FIRSTPASS_STATS            *stats)
+{
+    struct vpx_codec_cx_pkt pkt;
+    pkt.kind = VPX_CODEC_STATS_PKT;
+    pkt.data.twopass_stats.buf = stats;
+    pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
+    vpx_codec_pkt_list_add(pktlist, &pkt);
+
+// TEMP debug code
+#if OUTPUT_FPF
+
+    {
+        FILE *fpfile;
+        fpfile = fopen("firstpass.stt", "a");
+
+        fprintf(fpfile, "%12.0f %12.0f %12.0f %12.4f %12.4f %12.4f %12.4f"
+                " %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
+                " %12.0f %12.4f\n",
+                stats->frame,
+                stats->intra_error,
+                stats->coded_error,
+                stats->ssim_weighted_pred_err,
+                stats->pcnt_inter,
+                stats->pcnt_motion,
+                stats->pcnt_second_ref,
+                stats->pcnt_neutral,
+                stats->MVr,
+                stats->mvr_abs,
+                stats->MVc,
+                stats->mvc_abs,
+                stats->MVrv,
+                stats->MVcv,
+                stats->mv_in_out_count,
+                stats->count,
+                stats->duration);
+        fclose(fpfile);
+    }
+#endif
+}
+
+static void zero_stats(FIRSTPASS_STATS *section)
+{
+    section->frame      = 0.0;
+    section->intra_error = 0.0;
+    section->coded_error = 0.0;
+    section->ssim_weighted_pred_err = 0.0;
+    section->pcnt_inter  = 0.0;
+    section->pcnt_motion  = 0.0;
+    section->pcnt_second_ref = 0.0;
+    section->pcnt_neutral = 0.0;
+    section->MVr        = 0.0;
+    section->mvr_abs     = 0.0;
+    section->MVc        = 0.0;
+    section->mvc_abs     = 0.0;
+    section->MVrv       = 0.0;
+    section->MVcv       = 0.0;
+    section->mv_in_out_count  = 0.0;
+    section->count      = 0.0;
+    section->duration   = 1.0;
+}
+
+static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
+{
+    section->frame += frame->frame;
+    section->intra_error += frame->intra_error;
+    section->coded_error += frame->coded_error;
+    section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err;
+    section->pcnt_inter  += frame->pcnt_inter;
+    section->pcnt_motion += frame->pcnt_motion;
+    section->pcnt_second_ref += frame->pcnt_second_ref;
+    section->pcnt_neutral += frame->pcnt_neutral;
+    section->MVr        += frame->MVr;
+    section->mvr_abs     += frame->mvr_abs;
+    section->MVc        += frame->MVc;
+    section->mvc_abs     += frame->mvc_abs;
+    section->MVrv       += frame->MVrv;
+    section->MVcv       += frame->MVcv;
+    section->mv_in_out_count  += frame->mv_in_out_count;
+    section->count      += frame->count;
+    section->duration   += frame->duration;
+}
+
+static void avg_stats(FIRSTPASS_STATS *section)
+{
+    if (section->count < 1.0)
+        return;
+
+    section->intra_error /= section->count;
+    section->coded_error /= section->count;
+    section->ssim_weighted_pred_err /= section->count;
+    section->pcnt_inter  /= section->count;
+    section->pcnt_second_ref /= section->count;
+    section->pcnt_neutral /= section->count;
+    section->pcnt_motion /= section->count;
+    section->MVr        /= section->count;
+    section->mvr_abs     /= section->count;
+    section->MVc        /= section->count;
+    section->mvc_abs     /= section->count;
+    section->MVrv       /= section->count;
+    section->MVcv       /= section->count;
+    section->mv_in_out_count   /= section->count;
+    section->duration   /= section->count;
+}
+
 // Calculate a modified Error used in distributing bits between easier and harder frames
 static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 {
@@ -248,122 +391,6 @@ static int frame_max_bits(VP8_COMP *cpi)
     return max_bits;
 }
 
-
-static void output_stats(const VP8_COMP            *cpi,
-                         struct vpx_codec_pkt_list *pktlist,
-                         FIRSTPASS_STATS            *stats)
-{
-    struct vpx_codec_cx_pkt pkt;
-    pkt.kind = VPX_CODEC_STATS_PKT;
-    pkt.data.twopass_stats.buf = stats;
-    pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
-    vpx_codec_pkt_list_add(pktlist, &pkt);
-
-// TEMP debug code
-#if OUTPUT_FPF
-
-    {
-        FILE *fpfile;
-        fpfile = fopen("firstpass.stt", "a");
-
-        fprintf(fpfile, "%12.0f %12.0f %12.0f %12.4f %12.4f %12.4f %12.4f"
-                " %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
-                " %12.0f %12.4f\n",
-                stats->frame,
-                stats->intra_error,
-                stats->coded_error,
-                stats->ssim_weighted_pred_err,
-                stats->pcnt_inter,
-                stats->pcnt_motion,
-                stats->pcnt_second_ref,
-                stats->pcnt_neutral,
-                stats->MVr,
-                stats->mvr_abs,
-                stats->MVc,
-                stats->mvc_abs,
-                stats->MVrv,
-                stats->MVcv,
-                stats->mv_in_out_count,
-                stats->count,
-                stats->duration);
-        fclose(fpfile);
-    }
-#endif
-}
-
-static int input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
-{
-    if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
-        return EOF;
-
-    *fps = *cpi->twopass.stats_in;
-    cpi->twopass.stats_in =
-         (void*)((char *)cpi->twopass.stats_in + sizeof(FIRSTPASS_STATS));
-    return 1;
-}
-
-static void zero_stats(FIRSTPASS_STATS *section)
-{
-    section->frame      = 0.0;
-    section->intra_error = 0.0;
-    section->coded_error = 0.0;
-    section->ssim_weighted_pred_err = 0.0;
-    section->pcnt_inter  = 0.0;
-    section->pcnt_motion  = 0.0;
-    section->pcnt_second_ref = 0.0;
-    section->pcnt_neutral = 0.0;
-    section->MVr        = 0.0;
-    section->mvr_abs     = 0.0;
-    section->MVc        = 0.0;
-    section->mvc_abs     = 0.0;
-    section->MVrv       = 0.0;
-    section->MVcv       = 0.0;
-    section->mv_in_out_count  = 0.0;
-    section->count      = 0.0;
-    section->duration   = 1.0;
-}
-static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
-{
-    section->frame += frame->frame;
-    section->intra_error += frame->intra_error;
-    section->coded_error += frame->coded_error;
-    section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err;
-    section->pcnt_inter  += frame->pcnt_inter;
-    section->pcnt_motion += frame->pcnt_motion;
-    section->pcnt_second_ref += frame->pcnt_second_ref;
-    section->pcnt_neutral += frame->pcnt_neutral;
-    section->MVr        += frame->MVr;
-    section->mvr_abs     += frame->mvr_abs;
-    section->MVc        += frame->MVc;
-    section->mvc_abs     += frame->mvc_abs;
-    section->MVrv       += frame->MVrv;
-    section->MVcv       += frame->MVcv;
-    section->mv_in_out_count  += frame->mv_in_out_count;
-    section->count      += frame->count;
-    section->duration   += frame->duration;
-}
-static void avg_stats(FIRSTPASS_STATS *section)
-{
-    if (section->count < 1.0)
-        return;
-
-    section->intra_error /= section->count;
-    section->coded_error /= section->count;
-    section->ssim_weighted_pred_err /= section->count;
-    section->pcnt_inter  /= section->count;
-    section->pcnt_second_ref /= section->count;
-    section->pcnt_neutral /= section->count;
-    section->pcnt_motion /= section->count;
-    section->MVr        /= section->count;
-    section->mvr_abs     /= section->count;
-    section->MVc        /= section->count;
-    section->mvc_abs     /= section->count;
-    section->MVrv       /= section->count;
-    section->MVcv       /= section->count;
-    section->mv_in_out_count   /= section->count;
-    section->duration   /= section->count;
-}
-
 void vp8_init_first_pass(VP8_COMP *cpi)
 {
     zero_stats(cpi->twopass.total_stats);
@@ -552,6 +579,9 @@ void vp8_first_pass(VP8_COMP *cpi)
             xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
             xd->left_available = (mb_col != 0);
 
+            //Copy current mb to a buffer
+            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+
             // do intra 16x16 prediction
             this_error = vp8_encode_intra(cpi, x, use_dc_pred);
 
@@ -854,7 +884,11 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
 
         //cpi->twopass.est_max_qcorrection_factor /= adjustment_rate;
 
-        cpi->twopass.est_max_qcorrection_factor = (cpi->twopass.est_max_qcorrection_factor < 0.1) ? 0.1 : (cpi->twopass.est_max_qcorrection_factor > 10.0) ? 10.0 : cpi->twopass.est_max_qcorrection_factor;
+        cpi->twopass.est_max_qcorrection_factor =
+            (cpi->twopass.est_max_qcorrection_factor < 0.1)
+                ? 0.1
+                : (cpi->twopass.est_max_qcorrection_factor > 10.0)
+                    ? 10.0 : cpi->twopass.est_max_qcorrection_factor;
     }
 
     // Corrections for higher compression speed settings (reduced compression expected)
@@ -889,7 +923,6 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
             * speed_correction * cpi->twopass.est_max_qcorrection_factor
             * cpi->twopass.section_max_qfactor
             * (double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0);
-        //bits_per_mb_at_this_q = (int)(.5 + correction_factor * speed_correction * cpi->twopass.est_max_qcorrection_factor * (double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0);
 
         if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
             break;
@@ -1233,7 +1266,6 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra
     double motion_decay;
     double motion_pct = next_frame->pcnt_motion;
 
-
     // Initial basis is the % mbs inter coded
     prediction_decay_rate = next_frame->pcnt_inter;
 
@@ -1308,24 +1340,257 @@ static int detect_transition_to_still(
     return trans_to_still;
 }
 
+// This function detects a flash through the high relative pcnt_second_ref
+// score in the frame following a flash frame. The offset passed in should
+// reflect this
+static BOOL detect_flash( VP8_COMP *cpi, int offset )
+{
+    FIRSTPASS_STATS next_frame;
+
+    BOOL flash_detected = FALSE;
+
+    // Read the frame data.
+    // The return is FALSE (no flash detected) if not a valid frame
+    if ( read_frame_stats(cpi, &next_frame, offset) != EOF )
+    {
+        // What we are looking for here is a situation where there is a
+        // brief break in prediction (such as a flash) but subsequent frames
+        // are reasonably well predicted by an earlier (pre flash) frame.
+        // The recovery after a flash is indicated by a high pcnt_second_ref
+        // comapred to pcnt_inter.
+        if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
+             (next_frame.pcnt_second_ref >= 0.5 ) )
+        {
+            flash_detected = TRUE;
+
+            /*if (1)
+            {
+                FILE *f = fopen("flash.stt", "a");
+                fprintf(f, "%8.0f %6.2f %6.2f\n",
+                    next_frame.frame,
+                    next_frame.pcnt_inter,
+                    next_frame.pcnt_second_ref);
+                fclose(f);
+            }*/
+        }
+    }
+
+    return flash_detected;
+}
+
+// Update the motion related elements to the GF arf boost calculation
+static void accumulate_frame_motion_stats(
+    VP8_COMP *cpi,
+    FIRSTPASS_STATS * this_frame,
+    double * this_frame_mv_in_out,
+    double * mv_in_out_accumulator,
+    double * abs_mv_in_out_accumulator,
+    double * mv_ratio_accumulator )
+{
+    //double this_frame_mv_in_out;
+    double this_frame_mvr_ratio;
+    double this_frame_mvc_ratio;
+    double motion_pct;
+
+    // Accumulate motion stats.
+    motion_pct = this_frame->pcnt_motion;
+
+    // Accumulate Motion In/Out of frame stats
+    *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
+    *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
+    *abs_mv_in_out_accumulator +=
+        fabs(this_frame->mv_in_out_count * motion_pct);
+
+    // Accumulate a measure of how uniform (or conversely how random)
+    // the motion field is. (A ratio of absmv / mv)
+    if (motion_pct > 0.05)
+    {
+        this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
+                               DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr));
+
+        this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
+                               DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc));
+
+         *mv_ratio_accumulator +=
+            (this_frame_mvr_ratio < this_frame->mvr_abs)
+                ? (this_frame_mvr_ratio * motion_pct)
+                : this_frame->mvr_abs * motion_pct;
+
+        *mv_ratio_accumulator +=
+            (this_frame_mvc_ratio < this_frame->mvc_abs)
+                ? (this_frame_mvc_ratio * motion_pct)
+                : this_frame->mvc_abs * motion_pct;
+
+    }
+}
+
+// Calculate a baseline boost number for the current frame.
+static double calc_frame_boost(
+    VP8_COMP *cpi,
+    FIRSTPASS_STATS * this_frame,
+    double this_frame_mv_in_out )
+{
+    double frame_boost;
+
+    // Underlying boost factor is based on inter intra error ratio
+    if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
+        frame_boost = (IIFACTOR * this_frame->intra_error /
+                      DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
+    else
+        frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
+                      DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
+
+    // Increase boost for frames where new data coming into frame
+    // (eg zoom out). Slightly reduce boost if there is a net balance
+    // of motion out of the frame (zoom in).
+    // The range for this_frame_mv_in_out is -1.0 to +1.0
+    if (this_frame_mv_in_out > 0.0)
+        frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
+    // In extreme case boost is halved
+    else
+        frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
+
+    // Clip to maximum
+    if (frame_boost > GF_RMAX)
+        frame_boost = GF_RMAX;
+
+    return frame_boost;
+}
+
+#if NEW_BOOST
+static int calc_arf_boost(
+    VP8_COMP *cpi,
+    int offset,
+    int f_frames,
+    int b_frames,
+    int *f_boost,
+    int *b_boost )
+{
+    FIRSTPASS_STATS this_frame;
+
+    int i;
+    double boost_score = 0.0;
+    double fwd_boost_score = 0.0;
+    double mv_ratio_accumulator = 0.0;
+    double decay_accumulator = 1.0;
+    double this_frame_mv_in_out = 0.0;
+    double mv_in_out_accumulator = 0.0;
+    double abs_mv_in_out_accumulator = 0.0;
+    double r;
+    BOOL flash_detected = FALSE;
+
+    // Search forward from the proposed arf/next gf position
+    for ( i = 0; i < f_frames; i++ )
+    {
+        if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF )
+            break;
+
+        // Update the motion related elements to the boost calculation
+        accumulate_frame_motion_stats( cpi, &this_frame,
+            &this_frame_mv_in_out, &mv_in_out_accumulator,
+            &abs_mv_in_out_accumulator, &mv_ratio_accumulator );
+
+        // Calculate the baseline boost number for this frame
+        r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out );
+
+        // We want to discount the the flash frame itself and the recovery
+        // frame that follows as both will have poor scores.
+        flash_detected = detect_flash(cpi, (i+offset)) ||
+                         detect_flash(cpi, (i+offset+1));
+
+        // Cumulative effect of prediction quality decay
+        if ( !flash_detected )
+        {
+            decay_accumulator =
+                decay_accumulator *
+                get_prediction_decay_rate(cpi, &this_frame);
+            decay_accumulator =
+                decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
+        }
+        boost_score += (decay_accumulator * r);
+
+        // Break out conditions.
+        if  ( (!flash_detected) &&
+              ((mv_ratio_accumulator > 100.0) ||
+               (abs_mv_in_out_accumulator > 3.0) ||
+               (mv_in_out_accumulator < -2.0) ) )
+        {
+            break;
+        }
+    }
+
+    *f_boost = (int)(boost_score * 100.0) >> 4;
+
+    // Reset for backward looking loop
+    boost_score = 0.0;
+    mv_ratio_accumulator = 0.0;
+    decay_accumulator = 1.0;
+    this_frame_mv_in_out = 0.0;
+    mv_in_out_accumulator = 0.0;
+    abs_mv_in_out_accumulator = 0.0;
+
+    // Search forward from the proposed arf/next gf position
+    for ( i = -1; i >= -b_frames; i-- )
+    {
+        if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF )
+            break;
+
+        // Update the motion related elements to the boost calculation
+        accumulate_frame_motion_stats( cpi, &this_frame,
+            &this_frame_mv_in_out, &mv_in_out_accumulator,
+            &abs_mv_in_out_accumulator, &mv_ratio_accumulator );
+
+        // Calculate the baseline boost number for this frame
+        r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out );
+
+        // We want to discount the the flash frame itself and the recovery
+        // frame that follows as both will have poor scores.
+        flash_detected = detect_flash(cpi, (i+offset)) ||
+                         detect_flash(cpi, (i+offset+1));
+
+        // Cumulative effect of prediction quality decay
+        if ( !flash_detected )
+        {
+            decay_accumulator =
+                decay_accumulator *
+                get_prediction_decay_rate(cpi, &this_frame);
+            decay_accumulator =
+                decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
+        }
+
+        boost_score += (decay_accumulator * r);
+
+        // Break out conditions.
+        if  ( (!flash_detected) &&
+              ((mv_ratio_accumulator > 100.0) ||
+               (abs_mv_in_out_accumulator > 3.0) ||
+               (mv_in_out_accumulator < -2.0) ) )
+        {
+            break;
+        }
+    }
+    *b_boost = (int)(boost_score * 100.0) >> 4;
+
+    return (*f_boost + *b_boost);
+}
+#endif
+
 // Analyse and define a gf/arf group .
 static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 {
     FIRSTPASS_STATS next_frame;
     FIRSTPASS_STATS *start_pos;
     int i;
+    double r;
     double boost_score = 0.0;
     double old_boost_score = 0.0;
     double gf_group_err = 0.0;
     double gf_first_frame_err = 0.0;
     double mod_frame_err = 0.0;
 
-    double mv_accumulator_rabs  = 0.0;
-    double mv_accumulator_cabs  = 0.0;
     double mv_ratio_accumulator = 0.0;
     double decay_accumulator = 1.0;
 
-    double boost_factor = IIFACTOR;
     double loop_decay_rate = 1.00;          // Starting decay rate
 
     double this_frame_mv_in_out = 0.0;
@@ -1338,6 +1603,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     unsigned int allow_alt_ref =
                     cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
 
+    int alt_boost = 0;
+    int f_boost = 0;
+    int b_boost = 0;
+    BOOL flash_detected;
+
     cpi->twopass.gf_group_bits = 0;
     cpi->twopass.gf_decay_rate = 0;
 
@@ -1347,7 +1617,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
 
-    // Preload the stats for the next frame.
+    // Load stats for the current frame.
     mod_frame_err = calculate_modified_err(cpi, this_frame);
 
     // Note the error of the frame at the start of the group (this will be
@@ -1369,12 +1639,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) &&
            (i < cpi->twopass.frames_to_key))
     {
-        double r;
-        double this_frame_mvr_ratio;
-        double this_frame_mvc_ratio;
-        //double motion_pct = next_frame.pcnt_motion;
-        double motion_pct;
-
         i++;    // Increment the loop counter
 
         // Accumulate error score of frames in this gf group
@@ -1388,82 +1652,33 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         if (EOF == input_stats(cpi, &next_frame))
             break;
 
-        // Accumulate motion stats.
-        motion_pct = next_frame.pcnt_motion;
-        mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct);
-        mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct);
-
-        //Accumulate Motion In/Out of frame stats
-        this_frame_mv_in_out =
-            next_frame.mv_in_out_count * motion_pct;
-        mv_in_out_accumulator +=
-            next_frame.mv_in_out_count * motion_pct;
-        abs_mv_in_out_accumulator +=
-            fabs(next_frame.mv_in_out_count * motion_pct);
-
-        // If there is a significant amount of motion
-        if (motion_pct > 0.05)
-        {
-            this_frame_mvr_ratio = fabs(next_frame.mvr_abs) /
-                                   DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr));
+        // Test for the case where there is a brief flash but the prediction
+        // quality back to an earlier frame is then restored.
+        flash_detected = detect_flash(cpi, 0);
 
-            this_frame_mvc_ratio = fabs(next_frame.mvc_abs) /
-                                   DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc));
+        // Update the motion related elements to the boost calculation
+        accumulate_frame_motion_stats( cpi, &next_frame,
+            &this_frame_mv_in_out, &mv_in_out_accumulator,
+            &abs_mv_in_out_accumulator, &mv_ratio_accumulator );
 
-            mv_ratio_accumulator +=
-                (this_frame_mvr_ratio < next_frame.mvr_abs)
-                    ? (this_frame_mvr_ratio * motion_pct)
-                    : next_frame.mvr_abs * motion_pct;
+        // Calculate a baseline boost number for this frame
+        r = calc_frame_boost( cpi, &next_frame, this_frame_mv_in_out );
 
-            mv_ratio_accumulator +=
-                (this_frame_mvc_ratio < next_frame.mvc_abs)
-                    ? (this_frame_mvc_ratio * motion_pct)
-                    : next_frame.mvc_abs * motion_pct;
-        }
-        else
+        // Cumulative effect of prediction quality decay
+        if ( !flash_detected )
         {
-            mv_ratio_accumulator += 0.0;
-            this_frame_mvr_ratio = 1.0;
-            this_frame_mvc_ratio = 1.0;
+            loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+            decay_accumulator = decay_accumulator * loop_decay_rate;
+            decay_accumulator =
+                decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
         }
-
-        // Underlying boost factor is based on inter intra error ratio
-        r = ( boost_factor *
-              ( next_frame.intra_error /
-                DOUBLE_DIVIDE_CHECK(next_frame.coded_error)));
-
-        if (next_frame.intra_error > cpi->twopass.gf_intra_err_min)
-            r = (IIKFACTOR2 * next_frame.intra_error /
-                     DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
-        else
-            r = (IIKFACTOR2 * cpi->twopass.gf_intra_err_min /
-                     DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
-
-        // Increase boost for frames where new data coming into frame
-        // (eg zoom out). Slightly reduce boost if there is a net balance
-        // of motion out of the frame (zoom in).
-        // The range for this_frame_mv_in_out is -1.0 to +1.0
-        if (this_frame_mv_in_out > 0.0)
-            r += r * (this_frame_mv_in_out * 2.0);
-        // In extreme case boost is halved
-        else
-            r += r * (this_frame_mv_in_out / 2.0);
-
-        if (r > GF_RMAX)
-            r = GF_RMAX;
-
-        loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
-
-        // Cumulative effect of decay
-        decay_accumulator = decay_accumulator * loop_decay_rate;
-        decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
-
         boost_score += (decay_accumulator * r);
 
         // Break clause to detect very still sections after motion
         // For example a staic image after a fade or other transition.
         if ( detect_transition_to_still( cpi, i, 5,
-                                         loop_decay_rate, decay_accumulator ) )
+                                         loop_decay_rate,
+                                         decay_accumulator ) )
         {
             allow_alt_ref = FALSE;
             boost_score = old_boost_score;
@@ -1471,7 +1686,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         }
 
         // Break out conditions.
-        if  (   /* i>4 || */
+        if  (
             // Break at cpi->max_gf_interval unless almost totally static
             (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) ||
             (
@@ -1480,6 +1695,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                 // Dont break out very close to a key frame
                 ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
                 ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
+                (!flash_detected) &&
                 ((mv_ratio_accumulator > 100.0) ||
                  (abs_mv_in_out_accumulator > 3.0) ||
                  (mv_in_out_accumulator < -2.0) ||
@@ -1527,8 +1743,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             boost_score = max_boost;
     }
 
-    cpi->gfu_boost = (int)(boost_score * 100.0) >> 4;
-
     // Dont allow conventional gf too near the next kf
     if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)
     {
@@ -1547,39 +1761,70 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         }
     }
 
+    cpi->gfu_boost = (int)(boost_score * 100.0) >> 4;
+
+#if NEW_BOOST
+    // Alterrnative boost calculation for alt ref
+    alt_boost = calc_arf_boost( cpi, 0, (i-1), (i-1), &f_boost, &b_boost );
+#endif
+
     // Should we use the alternate refernce frame
     if (allow_alt_ref &&
         (i >= MIN_GF_INTERVAL) &&
         // dont use ARF very near next kf
         (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) &&
-        (((next_frame.pcnt_inter > 0.75) &&
-          ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) &&
-          //(cpi->gfu_boost>150) &&
-          (cpi->gfu_boost > 100) &&
-          //(cpi->gfu_boost>AF_THRESH2) &&
-          //((cpi->gfu_boost/i)>AF_THRESH) &&
-          //(decay_accumulator > 0.5) &&
-          (cpi->twopass.gf_decay_rate <= (ARF_DECAY_THRESH + (cpi->gfu_boost / 200)))
-         )
-        )
-       )
+#if NEW_BOOST
+        ((next_frame.pcnt_inter > 0.75) ||
+         (next_frame.pcnt_second_ref > 0.5)) &&
+        ((mv_in_out_accumulator / (double)i > -0.2) ||
+         (mv_in_out_accumulator > -2.0)) &&
+        (b_boost > 100) &&
+        (f_boost > 100) )
+#else
+        (next_frame.pcnt_inter > 0.75) &&
+        ((mv_in_out_accumulator / (double)i > -0.2) ||
+         (mv_in_out_accumulator > -2.0)) &&
+        (cpi->gfu_boost > 100) &&
+        (cpi->twopass.gf_decay_rate <=
+            (ARF_DECAY_THRESH + (cpi->gfu_boost / 200))) )
+#endif
     {
         int Boost;
         int allocation_chunks;
-        int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
+        int Q = (cpi->oxcf.fixed_q < 0)
+                ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
         int tmp_q;
         int arf_frame_bits = 0;
         int group_bits;
 
+#if NEW_BOOST
+        cpi->gfu_boost = alt_boost;
+#endif
+
         // Estimate the bits to be allocated to the group as a whole
-        if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0))
-            group_bits = (int)((double)cpi->twopass.kf_group_bits * (gf_group_err / (double)cpi->twopass.kf_group_error_left));
+        if ((cpi->twopass.kf_group_bits > 0) &&
+            (cpi->twopass.kf_group_error_left > 0))
+        {
+            group_bits = (int)((double)cpi->twopass.kf_group_bits *
+                (gf_group_err / (double)cpi->twopass.kf_group_error_left));
+        }
         else
             group_bits = 0;
 
         // Boost for arf frame
+#if NEW_BOOST
+        Boost = (alt_boost * GFQ_ADJUSTMENT) / 100;
+#else
         Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
+#endif
         Boost += (i * 50);
+
+        // Set max and minimum boost and hence minimum allocation
+        if (Boost > ((cpi->baseline_gf_interval + 1) * 200))
+            Boost = ((cpi->baseline_gf_interval + 1) * 200);
+        else if (Boost < 125)
+            Boost = 125;
+
         allocation_chunks = (i * 100) + Boost;
 
         // Normalize Altboost and allocations chunck down to prevent overflow
@@ -1589,13 +1834,17 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             allocation_chunks /= 2;
         }
 
-        // Calculate the number of bits to be spent on the arf based on the boost number
-        arf_frame_bits = (int)((double)Boost * (group_bits / (double)allocation_chunks));
+        // Calculate the number of bits to be spent on the arf based on the
+        // boost number
+        arf_frame_bits = (int)((double)Boost * (group_bits /
+                               (double)allocation_chunks));
 
-        // Estimate if there are enough bits available to make worthwhile use of an arf.
+        // Estimate if there are enough bits available to make worthwhile use
+        // of an arf.
         tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits);
 
-        // Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames.
+        // Only use an arf if it is likely we will be able to code
+        // it at a lower Q than the surrounding frames.
         if (tmp_q < cpi->worst_quality)
         {
             int half_gf_int;
@@ -1605,13 +1854,22 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
             cpi->source_alt_ref_pending = TRUE;
 
-            // For alt ref frames the error score for the end frame of the group (the alt ref frame) should not contribute to the group total and hence
-            // the number of bit allocated to the group. Rather it forms part of the next group (it is the GF at the start of the next group)
-            gf_group_err -= mod_frame_err;
+            // For alt ref frames the error score for the end frame of the
+            // group (the alt ref frame) should not contribute to the group
+            // total and hence the number of bit allocated to the group.
+            // Rather it forms part of the next group (it is the GF at the
+            // start of the next group)
+            // gf_group_err -= mod_frame_err;
 
-            // Set the interval till the next gf or arf. For ARFs this is the number of frames to be coded before the future frame that is coded as an ARF.
+            // For alt ref frames alt ref frame is technically part of the
+            // GF frame for the next group but we always base the error
+            // calculation and bit allocation on the current group of frames.
+
+            // Set the interval till the next gf or arf.
+            // For ARFs this is the number of frames to be coded before the
+            // future frame that is coded as an ARF.
             // The future frame itself is part of the next group
-            cpi->baseline_gf_interval = i - 1;
+            cpi->baseline_gf_interval = i;
 
             // Define the arnr filter width for this group of frames:
             // We only filter frames that lie within a distance of half
@@ -1620,7 +1878,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             // Note: this_frame->frame has been updated in the loop
             // so it now points at the ARF frame.
             half_gf_int = cpi->baseline_gf_interval >> 1;
-            frames_after_arf = cpi->twopass.total_stats->count - this_frame->frame - 1;
+            frames_after_arf = cpi->twopass.total_stats->count -
+                               this_frame->frame - 1;
 
             switch (cpi->oxcf.arnr_type)
             {
@@ -1669,23 +1928,38 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         cpi->baseline_gf_interval = i;
     }
 
-    // Now decide how many bits should be allocated to the GF group as  a proportion of those remaining in the kf group.
-    // The final key frame group in the clip is treated as a special case where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
-    // This is also important for short clips where there may only be one key frame.
-    if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame))
+    // Now decide how many bits should be allocated to the GF group as  a
+    // proportion of those remaining in the kf group.
+    // The final key frame group in the clip is treated as a special case
+    // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
+    // This is also important for short clips where there may only be one
+    // key frame.
+    if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count -
+                                            cpi->common.current_video_frame))
     {
-        cpi->twopass.kf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;
+        cpi->twopass.kf_group_bits =
+            (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;
     }
 
     // Calculate the bits to be allocated to the group as a whole
-    if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0))
-        cpi->twopass.gf_group_bits = (int)((double)cpi->twopass.kf_group_bits * (gf_group_err / (double)cpi->twopass.kf_group_error_left));
+    if ((cpi->twopass.kf_group_bits > 0) &&
+        (cpi->twopass.kf_group_error_left > 0))
+    {
+        cpi->twopass.gf_group_bits =
+            (int)((double)cpi->twopass.kf_group_bits *
+                  (gf_group_err / (double)cpi->twopass.kf_group_error_left));
+    }
     else
         cpi->twopass.gf_group_bits = 0;
 
-    cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
+    cpi->twopass.gf_group_bits =
+        (cpi->twopass.gf_group_bits < 0)
+            ? 0
+            : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
+                ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
 
-    // Clip cpi->twopass.gf_group_bits based on user supplied data rate variability limit (cpi->oxcf.two_pass_vbrmax_section)
+    // Clip cpi->twopass.gf_group_bits based on user supplied data rate
+    // variability limit (cpi->oxcf.two_pass_vbrmax_section)
     if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
         cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
 
@@ -1698,7 +1972,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     // Assign  bits to the arf or gf.
     for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) {
         int Boost;
-        int frames_in_section;
         int allocation_chunks;
         int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
         int gf_bits;
@@ -1706,8 +1979,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         // For ARF frames
         if (cpi->source_alt_ref_pending && i == 0)
         {
+#if NEW_BOOST
+            Boost = (alt_boost * GFQ_ADJUSTMENT) / 100;
+#else
             Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
-            //Boost += (cpi->baseline_gf_interval * 25);
+#endif
             Boost += (cpi->baseline_gf_interval * 50);
 
             // Set max and minimum boost and hence minimum allocation
@@ -1716,8 +1992,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             else if (Boost < 125)
                 Boost = 125;
 
-            frames_in_section = cpi->baseline_gf_interval + 1;
-            allocation_chunks = (frames_in_section * 100) + Boost;
+            allocation_chunks =
+                ((cpi->baseline_gf_interval + 1) * 100) + Boost;
         }
         // Else for standard golden frames
         else
@@ -1731,8 +2007,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             else if (Boost < 125)
                 Boost = 125;
 
-            frames_in_section = cpi->baseline_gf_interval;
-            allocation_chunks = (frames_in_section * 100) + (Boost - 100);
+            allocation_chunks =
+                (cpi->baseline_gf_interval * 100) + (Boost - 100);
         }
 
         // Normalize Altboost and allocations chunck down to prevent overflow
@@ -1742,8 +2018,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             allocation_chunks /= 2;
         }
 
-        // Calculate the number of bits to be spent on the gf or arf based on the boost number
-        gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks));
+        // Calculate the number of bits to be spent on the gf or arf based on
+        // the boost number
+        gf_bits = (int)((double)Boost *
+                        (cpi->twopass.gf_group_bits /
+                         (double)allocation_chunks));
 
         // If the frame that is to be boosted is simpler than the average for
         // the gf/arf group then use an alternative calculation
@@ -1825,17 +2104,26 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         if (cpi->twopass.gf_group_bits < 0)
             cpi->twopass.gf_group_bits = 0;
 
-        // Set aside some bits for a mid gf sequence boost
-        if ((cpi->gfu_boost > 150) && (cpi->baseline_gf_interval > 5))
         {
-            int pct_extra = (cpi->gfu_boost - 100) / 50;
-            pct_extra = (pct_extra > 10) ? 10 : pct_extra;
+#if NEW_BOOST
+            int boost = (cpi->source_alt_ref_pending)
+                        ? b_boost : cpi->gfu_boost;
+#else
+            int boost = cpi->gfu_boost;
+#endif
+            // Set aside some bits for a mid gf sequence boost
+            if ((boost > 150) && (cpi->baseline_gf_interval > 5))
+            {
+                int pct_extra = (boost - 100) / 50;
+                pct_extra = (pct_extra > 10) ? 10 : pct_extra;
 
-            cpi->twopass.mid_gf_extra_bits = (cpi->twopass.gf_group_bits * pct_extra) / 100;
-            cpi->twopass.gf_group_bits -= cpi->twopass.mid_gf_extra_bits;
+                cpi->twopass.mid_gf_extra_bits =
+                    (cpi->twopass.gf_group_bits * pct_extra) / 100;
+                cpi->twopass.gf_group_bits -= cpi->twopass.mid_gf_extra_bits;
+            }
+            else
+                cpi->twopass.mid_gf_extra_bits = 0;
         }
-        else
-            cpi->twopass.mid_gf_extra_bits = 0;
     }
 
     // Adjustment to estimate_max_q based on a measure of complexity of the section
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 416948870..6f314a386 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -286,8 +286,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     bestmv->as_mv.row = br << 1;
     bestmv->as_mv.col = bc << 1;
 
-    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > MAX_FULL_PEL_VAL) ||
-        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > MAX_FULL_PEL_VAL))
+    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
+        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
         return INT_MAX;
 
     return besterr;
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 44ed055db..416c4d5eb 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -22,7 +22,7 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
 
 
 #define MAX_MVSEARCH_STEPS 8                                    // The maximum number of steps in a step search given the largest allowed initial step
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8)    // Max full pel mv specified in 1/8 pel units
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)      // Max full pel mv specified in 1 pel units
 #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))            // Maximum size of the first step in full pel units
 
 extern void print_mode_context(void);
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index a84a7b4da..73b4c7dcd 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2012,7 +2012,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
         size_t packet_sz = sizeof(FIRSTPASS_STATS);
         int packets = oxcf->two_pass_stats_in.sz / packet_sz;
 
-        cpi->twopass.stats_in = oxcf->two_pass_stats_in.buf;
+        cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
+        cpi->twopass.stats_in = cpi->twopass.stats_in_start;
         cpi->twopass.stats_in_end = (void*)((char *)cpi->twopass.stats_in
                             + (packets - 1) * packet_sz);
         vp8_init_second_pass(cpi);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index ed2073aab..107a681be 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -525,7 +525,7 @@ typedef struct VP8_COMP
         unsigned int this_iiratio;
         FIRSTPASS_STATS *total_stats;
         FIRSTPASS_STATS *this_frame_stats;
-        FIRSTPASS_STATS *stats_in, *stats_in_end;
+        FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
         int first_pass_done;
         long long bits_left;
         long long clip_bits_total;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index b60d2419f..fac068f7a 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -588,10 +588,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 
             /* adjust mvp to make sure it is within MV range */
             vp8_clamp_mv(&mvp,
-                         best_ref_mv.as_mv.col - MAX_FULL_PEL_VAL,
-                         best_ref_mv.as_mv.col + MAX_FULL_PEL_VAL,
-                         best_ref_mv.as_mv.row - MAX_FULL_PEL_VAL,
-                         best_ref_mv.as_mv.row + MAX_FULL_PEL_VAL);
+                         best_ref_mv.as_mv.col - (MAX_FULL_PEL_VAL<<3),
+                         best_ref_mv.as_mv.col + (MAX_FULL_PEL_VAL<<3),
+                         best_ref_mv.as_mv.row - (MAX_FULL_PEL_VAL<<3),
+                         best_ref_mv.as_mv.row + (MAX_FULL_PEL_VAL<<3));
         }
 
         switch (this_mode)
@@ -610,7 +610,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                 rate2 += rate;
                 distortion2 = VARIANCE_INVOKE
                                 (&cpi->rtcd.variance, var16x16)(
-                                    x->src.y_buffer, x->src.y_stride,
+                                    *(b->base_src), b->src_stride,
                                     x->e_mbd.predictor, 16, &sse);
                 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
 
@@ -635,7 +635,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
             RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
                 (&x->e_mbd);
             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-                                          (x->src.y_buffer, x->src.y_stride,
+                                          (*(b->base_src), b->src_stride,
                                           x->e_mbd.predictor, 16, &sse);
             rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -681,10 +681,14 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                 mvp.as_int = best_ref_mv.as_int;
             }
 
-            col_min = (best_ref_mv.as_mv.col - MAX_FULL_PEL_VAL) >>3;
-            col_max = (best_ref_mv.as_mv.col + MAX_FULL_PEL_VAL) >>3;
-            row_min = (best_ref_mv.as_mv.row - MAX_FULL_PEL_VAL) >>3;
-            row_max = (best_ref_mv.as_mv.row + MAX_FULL_PEL_VAL) >>3;
+            col_min = (best_ref_mv.as_mv.col < 0)?(-((abs(best_ref_mv.as_mv.col))>>3) - MAX_FULL_PEL_VAL)
+                                                 :((best_ref_mv.as_mv.col>>3) - MAX_FULL_PEL_VAL);
+            col_max = (best_ref_mv.as_mv.col < 0)?(-((abs(best_ref_mv.as_mv.col))>>3) + MAX_FULL_PEL_VAL)
+                                                 :((best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL);
+            row_min = (best_ref_mv.as_mv.row < 0)?(-((abs(best_ref_mv.as_mv.row))>>3) - MAX_FULL_PEL_VAL)
+                                                 :((best_ref_mv.as_mv.row>>3) - MAX_FULL_PEL_VAL);
+            row_max = (best_ref_mv.as_mv.row < 0)?(-((abs(best_ref_mv.as_mv.row))>>3) + MAX_FULL_PEL_VAL)
+                                                 :((best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL);
 
             // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
             if (x->mv_col_min < col_min )
@@ -904,6 +908,7 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
     MB_PREDICTION_MODE mode, best_mode = DC_PRED;
     int this_rd;
     unsigned int sse;
+    BLOCK *b = &x->block[0];
 
     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
@@ -915,7 +920,7 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
         RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
             (&x->e_mbd);
         distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-            (x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, &sse);
+            (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse);
         rate = x->mbmode_cost[x->e_mbd.frame_type][mode];
         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 4f9d4126e..3d47412f7 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -141,13 +141,11 @@ static int get_max_filter_level(VP8_COMP *cpi, int base_qindex)
     // jbb chg: 20100118 - not so any more with this overquant stuff allow high values
     // with lots of intra coming in.
     int max_filter_level = MAX_LOOP_FILTER ;//* 3 / 4;
+    (void)base_qindex;
 
     if (cpi->twopass.section_intra_rating > 8)
         max_filter_level = MAX_LOOP_FILTER * 3 / 4;
 
-    (void) cpi;
-    (void) base_qindex;
-
     return max_filter_level;
 }
 
@@ -157,8 +155,8 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
     int best_err = 0;
     int filt_err = 0;
-    int min_filter_level = 0;
-    int max_filter_level = MAX_LOOP_FILTER * 3 / 4;   // PGW August 2006: Highest filter values almost always a bad idea
+    int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
+    int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
     int filt_val;
     int best_filt_val = cm->filter_level;
 
@@ -171,10 +169,6 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
     else
         cm->sharpness_level = cpi->oxcf.Sharpness;
 
-    // Enforce a minimum filter level based upon Q
-    min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
-    max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
-
     // Start the search at the previous frame filter level unless it is now out of range.
     if (cm->filter_level < min_filter_level)
         cm->filter_level = min_filter_level;
@@ -294,8 +288,8 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
     int best_err = 0;
     int filt_err = 0;
-    int min_filter_level;
-    int max_filter_level;
+    int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
+    int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
 
     int filter_step;
     int filt_high = 0;
@@ -329,10 +323,6 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
     else
         cm->sharpness_level = cpi->oxcf.Sharpness;
 
-    // Enforce a minimum filter level based upon Q
-    min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
-    max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
-
     // Start the search at the previous frame filter level unless it is now out of range.
     filt_mid = cm->filter_level;
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index e0359057a..355542e47 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -543,8 +543,8 @@ static void macro_block_yrd( MACROBLOCK *mb,
     BLOCK *beptr;
     int d;
 
-    ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, mb->src.y_buffer,
-                                   mb->e_mbd.predictor, mb->src.y_stride );
+    ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, *(mb->block[0].base_src),
+                                   mb->e_mbd.predictor, mb->block[0].src_stride );
 
     // Fdct and building the 2nd order block
     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
@@ -1330,10 +1330,14 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
 
         if (bsi.segment_rd < best_rd)
         {
-            int col_min = (best_ref_mv->as_mv.col - MAX_FULL_PEL_VAL) >>3;
-            int col_max = (best_ref_mv->as_mv.col + MAX_FULL_PEL_VAL) >>3;
-            int row_min = (best_ref_mv->as_mv.row - MAX_FULL_PEL_VAL) >>3;
-            int row_max = (best_ref_mv->as_mv.row + MAX_FULL_PEL_VAL) >>3;
+            int col_min = (best_ref_mv->as_mv.col < 0)?(-((abs(best_ref_mv->as_mv.col))>>3) - MAX_FULL_PEL_VAL)
+                                                      :((best_ref_mv->as_mv.col>>3) - MAX_FULL_PEL_VAL);
+            int col_max = (best_ref_mv->as_mv.col < 0)?(-((abs(best_ref_mv->as_mv.col))>>3) + MAX_FULL_PEL_VAL)
+                                                      :((best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL);
+            int row_min = (best_ref_mv->as_mv.row < 0)?(-((abs(best_ref_mv->as_mv.row))>>3) - MAX_FULL_PEL_VAL)
+                                                      :((best_ref_mv->as_mv.row>>3) - MAX_FULL_PEL_VAL);
+            int row_max = (best_ref_mv->as_mv.row < 0)?(-((abs(best_ref_mv->as_mv.row))>>3) + MAX_FULL_PEL_VAL)
+                                                      :((best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL);
 
             int tmp_col_min = x->mv_col_min;
             int tmp_col_max = x->mv_col_max;
@@ -1633,6 +1637,8 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
 {
 
     int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
+    BLOCK *b = &x->block[0];
+    unsigned char *src_y_ptr = *(b->base_src);
 
     //calculate sad for current frame 3 nearby MBs.
     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
@@ -1641,16 +1647,16 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
     }else if(xd->mb_to_top_edge==0)
     {   //only has left MB for sad calculation.
         near_sad[0] = near_sad[2] = INT_MAX;
-        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
+        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
     }else if(xd->mb_to_left_edge ==0)
     {   //only has left MB for sad calculation.
         near_sad[1] = near_sad[2] = INT_MAX;
-        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
+        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
     }else
     {
-        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
-        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
-        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
+        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
+        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
+        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
     }
 
     if(cpi->common.last_frame_type != KEY_FRAME)
@@ -1665,14 +1671,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
 
         if(near_sad[4] != INT_MAX)
-            near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
+            near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
         if(near_sad[5] != INT_MAX)
-            near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
-        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+            near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
+        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
         if(near_sad[6] != INT_MAX)
-            near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
+            near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
         if(near_sad[7] != INT_MAX)
-            near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
+            near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
     }
 
     if(cpi->common.last_frame_type != KEY_FRAME)
@@ -1876,10 +1882,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
             /* adjust mvp to make sure it is within MV range */
             vp8_clamp_mv(&mvp,
-                         best_ref_mv.as_mv.col - MAX_FULL_PEL_VAL,
-                         best_ref_mv.as_mv.col + MAX_FULL_PEL_VAL,
-                         best_ref_mv.as_mv.row - MAX_FULL_PEL_VAL,
-                         best_ref_mv.as_mv.row + MAX_FULL_PEL_VAL);
+                         best_ref_mv.as_mv.col - (MAX_FULL_PEL_VAL<<3),
+                         best_ref_mv.as_mv.col + (MAX_FULL_PEL_VAL<<3),
+                         best_ref_mv.as_mv.row - (MAX_FULL_PEL_VAL<<3),
+                         best_ref_mv.as_mv.row + (MAX_FULL_PEL_VAL<<3));
         }
 
         // Check to see if the testing frequency for this mode is at its max
@@ -2011,10 +2017,14 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
             int sadpb = x->sadperbit16;
 
-            int col_min = (best_ref_mv.as_mv.col - MAX_FULL_PEL_VAL) >>3;
-            int col_max = (best_ref_mv.as_mv.col + MAX_FULL_PEL_VAL) >>3;
-            int row_min = (best_ref_mv.as_mv.row - MAX_FULL_PEL_VAL) >>3;
-            int row_max = (best_ref_mv.as_mv.row + MAX_FULL_PEL_VAL) >>3;
+            int col_min = (best_ref_mv.as_mv.col < 0)?(-((abs(best_ref_mv.as_mv.col))>>3) - MAX_FULL_PEL_VAL)
+                                                     :((best_ref_mv.as_mv.col>>3) - MAX_FULL_PEL_VAL);
+            int col_max = (best_ref_mv.as_mv.col < 0)?(-((abs(best_ref_mv.as_mv.col))>>3) + MAX_FULL_PEL_VAL)
+                                                     :((best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL);
+            int row_min = (best_ref_mv.as_mv.row < 0)?(-((abs(best_ref_mv.as_mv.row))>>3) - MAX_FULL_PEL_VAL)
+                                                     :((best_ref_mv.as_mv.row>>3) - MAX_FULL_PEL_VAL);
+            int row_max = (best_ref_mv.as_mv.row < 0)?(-((abs(best_ref_mv.as_mv.row))>>3) + MAX_FULL_PEL_VAL)
+                                                     :((best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL);
 
             int tmp_col_min = x->mv_col_min;
             int tmp_col_max = x->mv_col_max;
@@ -2165,7 +2175,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                     threshold = x->encode_breakout;
 
                 var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
-                        (x->src.y_buffer, x->src.y_stride,
+                        (*(b->base_src), b->src_stride,
                         x->e_mbd.predictor, 16, &sse);
 
                 if (sse < threshold)
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 37a3205b2..7382a9134 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -147,6 +147,7 @@
 %if ABI_IS_32BIT
   %if CONFIG_PIC=1
   %ifidn __OUTPUT_FORMAT__,elf32
+    %define GET_GOT_SAVE_ARG 1
     %define WRT_PLT wrt ..plt
     %macro GET_GOT 1
       extern _GLOBAL_OFFSET_TABLE_
@@ -165,6 +166,7 @@
       %define RESTORE_GOT pop %1
     %endmacro
   %elifidn __OUTPUT_FORMAT__,macho32
+    %define GET_GOT_SAVE_ARG 1
     %macro GET_GOT 1
       push %1
       call %%get_got
diff --git a/vpxenc.c b/vpxenc.c
index 82f9071c4..042f07b81 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1347,6 +1347,8 @@ static void update_rate_histogram(struct rate_hist          *hist,
     if(now < cfg->rc_buf_initial_sz)
         return;
 
+    then = now;
+
     /* Sum the size over the past rc_buf_sz ms */
     for(i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--)
     {
@@ -1358,6 +1360,9 @@ static void update_rate_histogram(struct rate_hist          *hist,
         sum_sz += hist->sz[i_idx];
     }
 
+    if (now == then)
+        return;
+
     avg_bitrate = sum_sz * 8 * 1000 / (now - then);
     idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000);
     if(idx < 0)