29 files changed, 750 insertions, 206 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh
index b65f3b73a..f8329aacd 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -78,6 +78,7 @@ Build options:
   --log=yes|no|FILE           file configure log is written to [config.err]
   --target=TARGET             target platform tuple [generic-gnu]
   --cpu=CPU                   optimize for a specific cpu rather than a family
+  --extra-cflags=ECFLAGS      add ECFLAGS to CFLAGS [$CFLAGS]
   ${toggle_extra_warnings}    emit harmless warnings (always non-fatal)
   ${toggle_werror}            treat warnings as errors, if possible
                               (not available with all compilers)
@@ -442,6 +443,9 @@ process_common_cmdline() {
         ;;
         --cpu=*) tune_cpu="$optval"
         ;;
+        --extra-cflags=*)
+        extra_cflags="${optval}"
+        ;;
         --enable-?*|--disable-?*)
         eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
         echo "${CMDLINE_SELECT} ${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null || die_unknown $opt
@@ -665,7 +669,7 @@ process_common_toolchain() {
                 check_add_cflags -march=${tgt_isa}
                 check_add_asflags -march=${tgt_isa}
             fi
-
+            enabled debug && add_asflags -g
             asm_conversion_cmd="${source_path}/build/make/ads2gas.pl"
             ;;
         rvct)
@@ -690,6 +694,7 @@ process_common_toolchain() {
             arch_int=${tgt_isa##armv}
             arch_int=${arch_int%%te}
             check_add_asflags --pd "\"ARCHITECTURE SETA ${arch_int}\""
+            enabled debug && add_asflags -g
         ;;
         esac
 
@@ -972,6 +977,12 @@ EOF
         add_cflags -D_LARGEFILE_SOURCE
         add_cflags -D_FILE_OFFSET_BITS=64
     fi
+
+    # append any user defined extra cflags
+    if [ -n "${extra_cflags}" ] ; then
+        check_add_cflags ${extra_cflags} || \
+        die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
+    fi
 }
 
 process_toolchain() {
diff --git a/configure b/configure
index 5a502ae33..cb568d7aa 100755
--- a/configure
+++ b/configure
@@ -300,7 +300,7 @@ process_cmdline() {
         optval="${opt#*=}"
         case "$opt" in
         --disable-codecs) for c in ${CODECS}; do disable $c; done ;;
-        *) process_common_cmdline $opt
+        *) process_common_cmdline "$opt"
         ;;
         esac
     done
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index a38f0b72b..5a8991e65 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -282,6 +282,8 @@ typedef struct
 
     void *current_bc;
 
+    int corrupted;
+
 #if CONFIG_RUNTIME_CPU_DETECT
     struct VP8_COMMON_RTCD  *rtcd;
 #endif
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index c851aa7e5..d14f4dceb 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -206,4 +206,29 @@ static int vp8_decode_value(BOOL_DECODER *br, int bits)
 
     return z;
 }
+
+static int vp8dx_bool_error(BOOL_DECODER *br)
+{
+  /* Check if we have reached the end of the buffer.
+   *
+   * Variable 'count' stores the number of bits in the 'value' buffer,
+   * minus 8. So if count == 8, there are 16 bits available to be read.
+   * Normally, count is filled with 8 and one byte is filled into the
+   * value buffer. When we reach the end of the buffer, count is instead
+   * filled with VP8_LOTS_OF_BITS, 8 of which represent the last 8 real
+   * bits from the bitstream. So the last bit in the bitstream will be
+   * represented by count == VP8_LOTS_OF_BITS - 16.
+   */
+    if ((br->count > VP8_BD_VALUE_SIZE)
+        && (br->count <= VP8_LOTS_OF_BITS - 16))
+    {
+       /* We have tried to decode bits after the end of
+        * stream was encountered.
+        */
+        return 1;
+    }
+
+    /* No error. */
+    return 0;
+}
 #endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 9305a0556..d3972b324 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -381,6 +381,12 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
+        if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
+        {
+            /* propagate errors from reference frames */
+            xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
+        }
+
         vp8_build_uvmvs(xd, pc->full_pixel);
 
         /*
@@ -391,6 +397,8 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
         */
         vp8_decode_macroblock(pbi, xd);
 
+        /* check if the boolean decoder has suffered an error */
+        xd->corrupted |= vp8dx_bool_error(xd->current_bc);
 
         recon_yoffset += 16;
         recon_uvoffset += 8;
@@ -555,6 +563,7 @@ static void init_frame(VP8D_COMP *pbi)
     xd->frame_type = pc->frame_type;
     xd->mode_info_context->mbmi.mode = DC_PRED;
     xd->mode_info_stride = pc->mode_info_stride;
+    xd->corrupted = 0; /* init without corruption */
 }
 
 int vp8_decode_frame(VP8D_COMP *pbi)
@@ -570,6 +579,10 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     int i, j, k, l;
     const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
 
+    /* start with no corruption of current frame */
+    xd->corrupted = 0;
+    pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
+
     if (data_end - data < 3)
         vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                            "Truncated packet");
@@ -891,6 +904,14 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     stop_token_decoder(pbi);
 
+    /* Collect information about decoder corruption. */
+    /* 1. Check first boolean decoder for errors. */
+    pc->yv12_fb[pc->new_fb_idx].corrupted =
+        vp8dx_bool_error(bc);
+    /* 2. Check the macroblock information */
+    pc->yv12_fb[pc->new_fb_idx].corrupted |=
+        xd->corrupted;
+
     /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos); */
 
     /* If this was a kf or Gf note the Q used */
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index aa2709f5b..adbb1f336 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -254,12 +254,7 @@ static void ref_cnt_fb (int *buf, int *idx, int new_idx)
 /* If any buffer copy / swapping is signalled it should be done here. */
 static int swap_frame_buffers (VP8_COMMON *cm)
 {
-    int fb_to_update_with, err = 0;
-
-    if (cm->refresh_last_frame)
-        fb_to_update_with = cm->lst_fb_idx;
-    else
-        fb_to_update_with = cm->new_fb_idx;
+    int err = 0;
 
     /* The alternate reference frame or golden frame can be updated
      *  using the new, last, or golden/alt ref frame.  If it
@@ -271,7 +266,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
         int new_fb = 0;
 
         if (cm->copy_buffer_to_arf == 1)
-            new_fb = fb_to_update_with;
+            new_fb = cm->lst_fb_idx;
         else if (cm->copy_buffer_to_arf == 2)
             new_fb = cm->gld_fb_idx;
         else
@@ -285,7 +280,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
         int new_fb = 0;
 
         if (cm->copy_buffer_to_gf == 1)
-            new_fb = fb_to_update_with;
+            new_fb = cm->lst_fb_idx;
         else if (cm->copy_buffer_to_gf == 2)
             new_fb = cm->alt_fb_idx;
         else
@@ -334,6 +329,23 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->common.error.error_code = VPX_CODEC_OK;
 
+    if (size == 0)
+    {
+       /* This is used to signal that we are missing frames.
+        * We do not know if the missing frame(s) was supposed to update
+        * any of the reference buffers, but we act conservative and
+        * mark only the last buffer as corrupted.
+        */
+        cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+
+        /* Signal that we have no frame to show. */
+        cm->show_frame = 0;
+
+        /* Nothing more to do. */
+        return 0;
+    }
+
+
 #if HAVE_ARMV7
 #if CONFIG_RUNTIME_CPU_DETECT
     if (cm->rtcd.flags & HAS_NEON)
@@ -356,6 +368,13 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         }
 #endif
         pbi->common.error.setjmp = 0;
+
+       /* We do not know if the missing frame(s) was supposed to update
+        * any of the reference buffers, but we act conservative and
+        * mark only the last buffer as corrupted.
+        */
+        cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+
         if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
           cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
         return -1;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index dac990a26..fc8406eae 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -890,9 +890,18 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
+                if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
+                {
+                    /* propagate errors from reference frames */
+                    xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
+                }
+
                 vp8_build_uvmvs(xd, pc->full_pixel);
                 vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
 
+                /* check if the boolean decoder has suffered an error */
+                xd->corrupted |= vp8dx_bool_error(xd->current_bc);
+
                 if (pbi->common.filter_level)
                 {
                     /* Save decoded MB last row data for next-row decoding */
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 57cd318ee..42dae13de 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -65,6 +65,8 @@
 numparts_loop
     ldr     r10, [sp, #40]              ; ptr
     ldr     r5,  [sp, #36]              ; move mb_rows to the counting section
+    sub     r5, r5, r11                 ; move start point with each partition
+                                        ; mb_rows starts at i
     str     r5,  [sp, #12]
 
     ; Reset all of the VP8 Writer data for each partition that
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 1b09cfe4c..1475f76df 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT  |vp8_sub_pixel_variance16x16_neon|
+    EXPORT  |vp8_sub_pixel_variance16x16_neon_func|
     ARM
     REQUIRE8
     PRESERVE8
@@ -24,7 +24,7 @@
 ; stack(r6) unsigned int *sse
 ;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon.
 
-|vp8_sub_pixel_variance16x16_neon| PROC
+|vp8_sub_pixel_variance16x16_neon_func| PROC
     push            {r4-r6, lr}
 
     ldr             r12, _BilinearTaps_coeff_
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
new file mode 100644
index 000000000..4c7248543
--- /dev/null
+++ b/vp8/encoder/arm/variance_arm.c
@@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+
+#if HAVE_ARMV7
+
+unsigned int vp8_sub_pixel_variance16x16_neon
+(
+    const unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    const unsigned char *dst_ptr,
+    int dst_pixels_per_line,
+    unsigned int *sse
+)
+{
+  if (xoffset == 4 && yoffset == 0)
+    return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
+  else if (xoffset == 0 && yoffset == 4)
+    return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
+  else if (xoffset == 4 && yoffset == 4)
+    return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
+  else
+    return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
+}
+
+#endif
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 0e5f62fcf..3cbacfac3 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -30,6 +30,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon);
 //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);
 //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);
+extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon_func);
 extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);
 extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);
 extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index bf94e508b..3ad40efc0 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -112,6 +112,7 @@ typedef struct
 
     unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
     int optimize;
+    int q_index;
 
     void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
     void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 2a89c59f4..1689b43d1 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -365,6 +365,33 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
     x->block[24].zbin_extra = (short)zbin_extra;
+
+    /* save this macroblock QIndex for vp8_update_zbin_extra() */
+    x->q_index = QIndex;
+}
+void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
+{
+    int i;
+    int QIndex = x->q_index;
+    int zbin_extra;
+
+    // Y
+    zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+    for (i = 0; i < 16; i++)
+    {
+        x->block[i].zbin_extra = (short)zbin_extra;
+    }
+
+    // UV
+    zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+    for (i = 16; i < 24; i++)
+    {
+        x->block[i].zbin_extra = (short)zbin_extra;
+    }
+
+    // Y2
+    zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+    x->block[24].zbin_extra = (short)zbin_extra;
 }
 
 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
@@ -710,9 +737,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
     }
 
     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
-    //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
-    //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
 
     // Copy data over into macro block data sturctures.
 
@@ -1263,10 +1288,17 @@ int vp8cx_encode_inter_macroblock
 
     if (cpi->sf.RD)
     {
+        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+
         /* Are we using the fast quantizer for the mode selection? */
         if(cpi->sf.use_fastquant_for_pick)
+        {
             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
 
+            /* the fast quantizer does not use zbin_extra, so
+             * do not recalculate */
+            cpi->zbin_mode_boost_enabled = 0;
+        }
         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
 
         /* switch back to the regular quantizer for the encode */
@@ -1275,6 +1307,9 @@ int vp8cx_encode_inter_macroblock
             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
         }
 
+        /* restore cpi->zbin_mode_boost_enabled */
+        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
+
     }
     else
 #endif
@@ -1291,7 +1326,7 @@ int vp8cx_encode_inter_macroblock
 #endif
 
     // MB level adjutment to quantizer setup
-    if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
+    if (xd->segmentation_enabled)
     {
         // If cyclic update enabled
         if (cpi->cyclic_refresh_mode_enabled)
@@ -1301,9 +1336,14 @@ int vp8cx_encode_inter_macroblock
                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
             {
                 xd->mode_info_context->mbmi.segment_id = 0;
+
+                /* segment_id changed, so update */
+                vp8cx_mb_init_quantizer(cpi, x);
             }
         }
+    }
 
+    {
         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
         if (cpi->zbin_mode_boost_enabled)
         {
@@ -1327,7 +1367,7 @@ int vp8cx_encode_inter_macroblock
         else
             cpi->zbin_mode_boost = 0;
 
-        vp8cx_mb_init_quantizer(cpi,  x);
+        vp8_update_zbin_extra(cpi, x);
     }
 
     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index c59613838..3e67bf53c 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1316,6 +1316,43 @@ void vp8_end_second_pass(VP8_COMP *cpi)
 {
 }
 
+// This function gives and estimate of how badly we believe
+// the predicition quality is decaying from frame to frame.
+double gf_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
+{
+    double prediction_decay_rate;
+    double motion_decay;
+    double motion_pct = next_frame->pcnt_motion;
+
+
+    // Initial basis is the % mbs inter coded
+    prediction_decay_rate = next_frame->pcnt_inter;
+
+    // High % motion -> somewhat higher decay rate
+    motion_decay = (1.0 - (motion_pct / 20.0));
+    if (motion_decay < prediction_decay_rate)
+        prediction_decay_rate = motion_decay;
+
+    // Adjustment to decay rate based on speed of motion
+    {
+        double this_mv_rabs;
+        double this_mv_cabs;
+        double distance_factor;
+
+        this_mv_rabs = fabs(next_frame->mvr_abs * motion_pct);
+        this_mv_cabs = fabs(next_frame->mvc_abs * motion_pct);
+
+        distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
+                               (this_mv_cabs * this_mv_cabs)) / 250.0;
+        distance_factor = ((distance_factor > 1.0)
+                                ? 0.0 : (1.0 - distance_factor));
+        if (distance_factor < prediction_decay_rate)
+            prediction_decay_rate = distance_factor;
+    }
+
+    return prediction_decay_rate;
+}
+
 // Analyse and define a gf/arf group .
 static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 {
@@ -1337,17 +1374,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     double decay_accumulator = 1.0;
 
     double boost_factor = IIFACTOR;
-    double loop_decay_rate = 1.00;        // Starting decay rate
+    double loop_decay_rate = 1.00;          // Starting decay rate
 
     double this_frame_mv_in_out = 0.0;
     double mv_in_out_accumulator = 0.0;
     double abs_mv_in_out_accumulator = 0.0;
     double mod_err_per_mb_accumulator = 0.0;
 
-    int max_bits = frame_max_bits(cpi);    // Max for a single frame
+    int max_bits = frame_max_bits(cpi);     // Max for a single frame
 
     unsigned char *fpmm_pos;
 
+    unsigned int allow_alt_ref =
+                    cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
+
     cpi->gf_group_bits = 0;
     cpi->gf_decay_rate = 0;
 
@@ -1362,47 +1402,57 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     // Preload the stats for the next frame.
     mod_frame_err = calculate_modified_err(cpi, this_frame);
 
-    // Note the error of the frame at the start of the group (this will be the GF frame error if we code a normal gf
+    // Note the error of the frame at the start of the group (this will be
+    // the GF frame error if we code a normal gf
     gf_first_frame_err = mod_frame_err;
 
-    // Special treatment if the current frame is a key frame (which is also a gf).
-    // If it is then its error score (and hence bit allocation) need to be subtracted out
-    // from the calculation for the GF group
+    // Special treatment if the current frame is a key frame (which is also
+    // a gf). If it is then its error score (and hence bit allocation) need
+    // to be subtracted out from the calculation for the GF group
     if (cpi->common.frame_type == KEY_FRAME)
         gf_group_err -= gf_first_frame_err;
 
-    // Scan forward to try and work out how many frames the next gf group should contain and
-    // what level of boost is appropriate for the GF or ARF that will be coded with the group
+    // Scan forward to try and work out how many frames the next gf group
+    // should contain and what level of boost is appropriate for the GF
+    // or ARF that will be coded with the group
     i = 0;
 
-    while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
+    while (((i < cpi->static_scene_max_gf_interval) ||
+            ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) &&
+           (i < cpi->frames_to_key))
     {
         double r;
         double this_frame_mvr_ratio;
         double this_frame_mvc_ratio;
         double motion_decay;
-        double motion_pct = next_frame.pcnt_motion;
+        //double motion_pct = next_frame.pcnt_motion;
+        double motion_pct;
 
-        i++;                                                    // Increment the loop counter
+        i++;    // Increment the loop counter
 
         // Accumulate error score of frames in this gf group
         mod_frame_err = calculate_modified_err(cpi, this_frame);
 
         gf_group_err += mod_frame_err;
 
-        mod_err_per_mb_accumulator += mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs);
+        mod_err_per_mb_accumulator +=
+            mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs);
 
         if (EOF == vp8_input_stats(cpi, &next_frame))
             break;
 
         // Accumulate motion stats.
+        motion_pct = next_frame.pcnt_motion;
         mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct);
         mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct);
 
         //Accumulate Motion In/Out of frame stats
-        this_frame_mv_in_out = next_frame.mv_in_out_count * next_frame.pcnt_motion;
-        mv_in_out_accumulator += next_frame.mv_in_out_count * next_frame.pcnt_motion;
-        abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
+        this_frame_mv_in_out =
+            next_frame.mv_in_out_count * motion_pct;
+        mv_in_out_accumulator +=
+            next_frame.mv_in_out_count * motion_pct;
+        abs_mv_in_out_accumulator +=
+            fabs(next_frame.mv_in_out_count * motion_pct);
 
         // If there is a significant amount of motion
         if (motion_pct > 0.05)
@@ -1431,7 +1481,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         }
 
         // Underlying boost factor is based on inter intra error ratio
-        r = (boost_factor * (next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)));
+        r = ( boost_factor *
+              ( next_frame.intra_error /
+                DOUBLE_DIVIDE_CHECK(next_frame.coded_error)));
 
         if (next_frame.intra_error > cpi->gf_intra_err_min)
             r = (IIKFACTOR2 * next_frame.intra_error /
@@ -1440,63 +1492,87 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             r = (IIKFACTOR2 * cpi->gf_intra_err_min /
                      DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
 
-        // Increase boost for frames where new data coming into frame (eg zoom out)
-        // Slightly reduce boost if there is a net balance of motion out of the frame (zoom in)
+        // Increase boost for frames where new data coming into frame
+        // (eg zoom out). Slightly reduce boost if there is a net balance
+        // of motion out of the frame (zoom in).
         // The range for this_frame_mv_in_out is -1.0 to +1.0
         if (this_frame_mv_in_out > 0.0)
             r += r * (this_frame_mv_in_out * 2.0);
+        // In extreme case boost is halved
         else
-            r += r * (this_frame_mv_in_out / 2.0);  // In extreme case boost is halved
+            r += r * (this_frame_mv_in_out / 2.0);
 
         if (r > GF_RMAX)
             r = GF_RMAX;
 
-        // Adjust loop decay rate
-        //if ( next_frame.pcnt_inter < loop_decay_rate )
-        loop_decay_rate = next_frame.pcnt_inter;
+        loop_decay_rate = gf_prediction_decay_rate(cpi, &next_frame);
 
-        // High % motion -> somewhat higher decay rate
-        motion_decay = (1.0 - (motion_pct / 20.0));
-        if (motion_decay < loop_decay_rate)
-            loop_decay_rate = motion_decay;
+        // Cumulative effect of decay
+        decay_accumulator = decay_accumulator * loop_decay_rate;
+        decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
 
-        // Adjustment to decay rate based on speed of motion
+        boost_score += (decay_accumulator * r);
+
+        // Break clause to detect very still sections after motion
+        // For example a staic image after a fade or other transition
+        // instead of a clean key frame.
+        if ( (i > MIN_GF_INTERVAL) &&
+             (loop_decay_rate >= 0.999) &&
+             (decay_accumulator < 0.9) )
         {
-            double this_mv_rabs;
-            double this_mv_cabs;
-            double distance_factor;
+            int j;
+            FIRSTPASS_STATS * position = cpi->stats_in;
+            FIRSTPASS_STATS tmp_next_frame;
+            double decay_rate;
+
+            // Look ahead a few frames to see if static condition
+            // persists...
+            for ( j = 0; j < 4; j++ )
+            {
+                if (EOF == vp8_input_stats(cpi, &tmp_next_frame))
+                    break;
 
-            this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
-            this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
+                decay_rate = gf_prediction_decay_rate(cpi, &tmp_next_frame);
+                if ( decay_rate < 0.999 )
+                    break;
+            }
+            reset_fpf_position(cpi, position);            // Reset file position
 
-            distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
-                                   (this_mv_cabs * this_mv_cabs)) / 250.0;
-            distance_factor = ((distance_factor > 1.0)
-                                    ? 0.0 : (1.0 - distance_factor));
-            if (distance_factor < loop_decay_rate)
-                loop_decay_rate = distance_factor;
-        }
+            // Force GF not alt ref
+            if ( j == 4 )
+            {
+                if (0)
+                {
+                    FILE *f = fopen("fadegf.stt", "a");
+                    fprintf(f, " %8d %8d %10.4f %10.4f %10.4f\n",
+                         cpi->common.current_video_frame+i, i,
+                         loop_decay_rate, decay_accumulator,
+                         boost_score );
+                    fclose(f);
+                }
 
-        // Cumulative effect of decay
-        decay_accumulator = decay_accumulator * loop_decay_rate;
-        decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
-        //decay_accumulator = ( loop_decay_rate < decay_accumulator ) ? loop_decay_rate : decay_accumulator;
+                allow_alt_ref = FALSE;
 
-        boost_score += (decay_accumulator * r);
+                boost_score = old_boost_score;
+                break;
+            }
+        }
 
         // Break out conditions.
-        if (   /* i>4 || */
+        if  (   /* i>4 || */
+            // Break at cpi->max_gf_interval unless almost totally static
+            (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) ||
             (
-                (i > MIN_GF_INTERVAL) &&                            // Dont break out with a very short interval
-                ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) &&      // Dont break out very close to a key frame
+                // Dont break out with a very short interval
+                (i > MIN_GF_INTERVAL) &&
+                // Dont break out very close to a key frame
+                ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) &&
                 ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
                 ((mv_ratio_accumulator > 100.0) ||
                  (abs_mv_in_out_accumulator > 3.0) ||
                  (mv_in_out_accumulator < -2.0) ||
-                 ((boost_score - old_boost_score) < 2.0)
-                )
-            )
-        )
+                 ((boost_score - old_boost_score) < 2.0))
+            ) )
         {
             boost_score = old_boost_score;
             break;
@@ -1507,7 +1583,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         old_boost_score = boost_score;
     }
 
-    cpi->gf_decay_rate = (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0;
+    cpi->gf_decay_rate =
+        (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0;
 
     // When using CBR apply additional buffer related upper limits
     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -1517,7 +1594,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         // For cbr apply buffer related limits
         if (cpi->drop_frames_allowed)
         {
-            int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100);
+            int df_buffer_level = cpi->oxcf.drop_frames_water_mark *
+                                  (cpi->oxcf.optimal_buffer_level / 100);
 
             if (cpi->buffer_level > df_buffer_level)
                 max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth);
@@ -1540,10 +1618,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     cpi->gfu_boost = (int)(boost_score * 100.0) >> 4;
 
     // Should we use the alternate refernce frame
-    if (cpi->oxcf.play_alternate &&
-        cpi->oxcf.lag_in_frames &&
+    if (allow_alt_ref &&
         (i >= MIN_GF_INTERVAL) &&
-        (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) &&          // dont use ARF very near next kf
+        // dont use ARF very near next kf
+        (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) &&
         (((next_frame.pcnt_inter > 0.75) &&
           ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) &&
           //(cpi->gfu_boost>150) &&
@@ -2345,12 +2423,35 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     if (cpi->oxcf.auto_key
         && cpi->frames_to_key > (int)cpi->key_frame_frequency )
     {
+        int current_pos = cpi->stats_in;
+        FIRSTPASS_STATS tmp_frame;
+
         cpi->frames_to_key /= 2;
 
-        // Estimate corrected kf group error
-        kf_group_err /= 2.0;
-        kf_group_intra_err /= 2.0;
-        kf_group_coded_err /= 2.0;
+        // Copy first frame details
+        vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
+
+        // Reset to the start of the group
+        reset_fpf_position(cpi, start_position);
+
+        kf_group_err = 0;
+        kf_group_intra_err = 0;
+        kf_group_coded_err = 0;
+
+        // Rescan to get the correct error data for the forced kf group
+        for( i = 0; i < cpi->frames_to_key; i++ )
+        {
+            // Accumulate kf group errors
+            kf_group_err += calculate_modified_err(cpi, &tmp_frame);
+            kf_group_intra_err += tmp_frame.intra_error;
+            kf_group_coded_err += tmp_frame.coded_error;
+
+            // Load a the next frame's stats
+            vp8_input_stats(cpi, &tmp_frame);
+        }
+
+        // Reset to the start of the group
+        reset_fpf_position(cpi, current_pos);
 
         cpi->next_key_frame_forced = TRUE;
     }
@@ -2449,7 +2550,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     {
         double r;
         double motion_decay;
-        double motion_pct = next_frame.pcnt_motion;
+        double motion_pct;
 
         if (EOF == vp8_input_stats(cpi, &next_frame))
             break;
@@ -2469,6 +2570,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         loop_decay_rate = next_frame.pcnt_inter;
 
         // High % motion -> somewhat higher decay rate
+        motion_pct = next_frame.pcnt_motion;
         motion_decay = (1.0 - (motion_pct / 20.0));
         if (motion_decay < loop_decay_rate)
             loop_decay_rate = motion_decay;
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index be00d0218..4738a5b28 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -91,8 +91,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
 
     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
     cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-
+#if !(CONFIG_REALTIME_ONLY)
     cpi->rtcd.search.full_search             = vp8_full_search_sad;
+#endif
     cpi->rtcd.search.diamond_search          = vp8_diamond_search_sad;
 
     cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_c;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 9b91739cc..d9923fbe9 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -408,6 +408,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
         break;
     case 3:
+    default:
         this_mv.col += 4;
         this_mv.row += 4;
         diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
@@ -1387,8 +1388,6 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     else
         return INT_MAX;
 }
-#endif
-
 
 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
 {
@@ -1541,6 +1540,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     else
         return INT_MAX;
 }
+#endif /* !(CONFIG_REALTIME_ONLY) */
 
 #ifdef ENTROPY_STATS
 void print_mode_context(void)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index e0d1d7043..900785364 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -314,7 +314,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
     vpx_free(cpi->tok);
     cpi->tok = 0;
 
-    // Structure used to minitor GF useage
+    // Structure used to monitor GF usage
     if (cpi->gf_active_flags != 0)
         vpx_free(cpi->gf_active_flags);
 
@@ -325,6 +325,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 
     cpi->mb.pip = 0;
 
+#if !(CONFIG_REALTIME_ONLY)
     if(cpi->total_stats)
         vpx_free(cpi->total_stats);
 
@@ -334,6 +335,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
         vpx_free(cpi->this_frame_stats);
 
     cpi->this_frame_stats = 0;
+#endif
 }
 
 static void enable_segmentation(VP8_PTR ptr)
@@ -1448,6 +1450,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
 
     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 
+#if !(CONFIG_REALTIME_ONLY)
     if(cpi->total_stats)
         vpx_free(cpi->total_stats);
 
@@ -1461,6 +1464,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
     if(!cpi->total_stats || !cpi->this_frame_stats)
         vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate firstpass stats");
+#endif
 }
 
 
@@ -1497,21 +1501,28 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
     cpi->per_frame_bandwidth          = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
     cpi->av_per_frame_bandwidth        = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
     cpi->min_frame_bandwidth          = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
-    cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2;
 
-    //cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1;
-    //cpi->max_gf_interval = 24;
+    // Set Maximum gf/arf interval
+    cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
 
-    if (cpi->max_gf_interval < 12)
+    if(cpi->max_gf_interval < 12)
         cpi->max_gf_interval = 12;
 
+    // Extended interval for genuinely static scenes
+    cpi->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
 
-    // Special conditions when altr ref frame enabled in lagged compress mode
+     // Special conditions when altr ref frame enabled in lagged compress mode
     if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
     {
         if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
             cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;
+
+        if (cpi->static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1)
+            cpi->static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1;
     }
+
+    if ( cpi->max_gf_interval > cpi->static_scene_max_gf_interval )
+        cpi->max_gf_interval = cpi->static_scene_max_gf_interval;
 }
 
 
@@ -1551,6 +1562,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
         cpi->auto_worst_q              = 0;
         cpi->oxcf.best_allowed_q            = MINQ;
         cpi->oxcf.worst_allowed_q           = MAXQ;
+        cpi->oxcf.cq_level = MINQ;
 
         cpi->oxcf.end_usage                = USAGE_STREAM_FROM_SERVER;
         cpi->oxcf.starting_buffer_level     =   4000;
@@ -1651,6 +1663,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
 
     cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
     cpi->oxcf.best_allowed_q  = q_trans[oxcf->best_allowed_q];
+    cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
 
     if (oxcf->fixed_q >= 0)
     {
@@ -1740,6 +1753,8 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     cpi->avg_frame_qindex             = cpi->oxcf.worst_allowed_q;
     cpi->best_quality                = cpi->oxcf.best_allowed_q;
     cpi->active_best_quality          = cpi->oxcf.best_allowed_q;
+    cpi->cq_target_quality = cpi->oxcf.cq_level;
+
     cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
 
     cpi->rolling_target_bits          = cpi->av_per_frame_bandwidth;
@@ -1936,6 +1951,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
 
     cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
     cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
+    cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
 
     if (oxcf->fixed_q >= 0)
     {
@@ -2028,7 +2044,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     cpi->active_best_quality          = cpi->oxcf.best_allowed_q;
     cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
 
-    // Experimental cq target value
     cpi->cq_target_quality = cpi->oxcf.cq_level;
 
     cpi->rolling_target_bits          = cpi->av_per_frame_bandwidth;
@@ -3146,8 +3161,8 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
     // Update data structure that monitors level of reference to last GF
     vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
-    // this frame refreshes means next frames don't unless specified by user
 
+    // this frame refreshes means next frames don't unless specified by user
     cpi->common.frames_since_golden = 0;
 
     // Clear the alternate reference update pending flag.
@@ -3817,35 +3832,46 @@ static void encode_frame_to_data_rate
 
         if ( cm->frame_type == KEY_FRAME )
         {
-            // Special case for key frames forced because we have reached
-            // the maximum key frame interval. Here force the Q to a range
-            // close to but just below the ambient Q to minimize the risk
-            // of popping
-            if ( cpi->this_key_frame_forced )
-            {
-                cpi->active_worst_quality = cpi->avg_frame_qindex * 7/8;
-                cpi->active_best_quality = cpi->avg_frame_qindex * 2/3;
-            }
-            else
+            if ( cpi->pass == 2 )
             {
-               if ( cpi->pass == 2 )
-               {
-                   if (cpi->gfu_boost > 600)
-                       cpi->active_best_quality = kf_low_motion_minq[Q];
-                   else
-                       cpi->active_best_quality = kf_high_motion_minq[Q];
-               }
-               // One pass more conservative
-               else
+                if (cpi->gfu_boost > 600)
+                   cpi->active_best_quality = kf_low_motion_minq[Q];
+                else
                    cpi->active_best_quality = kf_high_motion_minq[Q];
+
+                // Special case for key frames forced because we have reached
+                // the maximum key frame interval. Here force the Q to a range
+                // based on the ambient Q to reduce the risk of popping
+                if ( cpi->this_key_frame_forced )
+                {
+                    if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8)
+                        cpi->active_best_quality = cpi->avg_frame_qindex * 7/8;
+                    else if ( cpi->active_best_quality < cpi->avg_frame_qindex >> 2 )
+                        cpi->active_best_quality = cpi->avg_frame_qindex >> 2;
+                }
             }
+            // One pass more conservative
+            else
+               cpi->active_best_quality = kf_high_motion_minq[Q];
         }
 
         else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
         {
-            if (cpi->avg_frame_qindex < cpi->active_worst_quality)
+            // Use the lower of cpi->active_worst_quality and recent
+            // average Q as basis for GF/ARF Q limit unless last frame was
+            // a key frame.
+            if ( (cpi->frames_since_key > 1) &&
+                 (cpi->avg_frame_qindex < cpi->active_worst_quality) )
+            {
                 Q = cpi->avg_frame_qindex;
 
+                if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
+                     (Q < cpi->oxcf.cq_level) )
+                {
+                    Q = cpi->oxcf.cq_level;
+                }
+            }
+
             if ( cpi->pass == 2 )
             {
                 if ( cpi->gfu_boost > 1000 )
@@ -4153,9 +4179,44 @@ static void encode_frame_to_data_rate
             active_worst_qchanged = FALSE;
 
 #if !(CONFIG_REALTIME_ONLY)
+        // Special case handling for forced key frames
+        if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced )
+        {
+            int last_q = Q;
+            int kf_err = vp8_calc_ss_err(cpi->Source,
+                                         &cm->yv12_fb[cm->new_fb_idx],
+                                         IF_RTCD(&cpi->rtcd.variance));
+
+            // The key frame is not good enough
+            if ( kf_err > ((cpi->ambient_err * 7) >> 3) )
+            {
+                // Lower q_high
+                q_high = (Q > q_low) ? (Q - 1) : q_low;
+
+                // Adjust Q
+                Q = (q_high + q_low) >> 1;
+            }
+            // The key frame is much better than the previous frame
+            else if ( kf_err < (cpi->ambient_err >> 1) )
+            {
+                // Raise q_low
+                q_low = (Q < q_high) ? (Q + 1) : q_high;
+
+                // Adjust Q
+                Q = (q_high + q_low + 1) >> 1;
+            }
+
+            // Clamp Q to upper and lower limits:
+            if (Q > q_high)
+                Q = q_high;
+            else if (Q < q_low)
+                Q = q_low;
+
+            Loop = ((Q != last_q)) ? TRUE : FALSE;
+        }
 
         // Is the projected frame size out of range and are we allowed to attempt to recode.
-        if ( recode_loop_test( cpi,
+        else if ( recode_loop_test( cpi,
                                frame_over_shoot_limit, frame_under_shoot_limit,
                                Q, top_index, bottom_index ) )
         {
@@ -4171,7 +4232,7 @@ static void encode_frame_to_data_rate
                 //if ( cpi->zbin_over_quant == 0 )
                 q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
 
-                if (cpi->zbin_over_quant > 0)           // If we are using over quant do the same for zbin_oq_low
+                if (cpi->zbin_over_quant > 0)            // If we are using over quant do the same for zbin_oq_low
                     zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
 
                 //if ( undershoot_seen || (Q == MAXQ) )
@@ -4314,9 +4375,15 @@ static void encode_frame_to_data_rate
     }
 #endif
 
-    // Update the GF useage maps.
-    // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
-    vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
+    // Special case code to reduce pulsing when key frames are forced at a
+    // fixed interval. Note the reconstruction error if it is the frame before
+    // the force key frame
+    if ( cpi->next_key_frame_forced && (cpi->frames_to_key == 0) )
+    {
+        cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
+                                           &cm->yv12_fb[cm->new_fb_idx],
+                                           IF_RTCD(&cpi->rtcd.variance));
+    }
 
     // This frame's MVs are saved and will be used in next frame's MV prediction.
     if(cm->show_frame)   //do not save for altref frame
@@ -4343,7 +4410,6 @@ static void encode_frame_to_data_rate
       }
     }
 
-
     // Update the GF useage maps.
     // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
     vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
@@ -4374,8 +4440,6 @@ static void encode_frame_to_data_rate
     else
         cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
 
-
-
     //#pragma omp parallel sections
     {
 
@@ -4483,9 +4547,7 @@ static void encode_frame_to_data_rate
     }
 
     // Keep a record of ambient average Q.
-    if (cm->frame_type == KEY_FRAME)
-        cpi->avg_frame_qindex = cm->base_qindex;
-    else
+    if (cm->frame_type != KEY_FRAME)
         cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
 
     // Keep a record from which we can calculate the average Q excluding GF updates and key frames
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 6b07e2f22..8a97e983b 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -320,6 +320,9 @@ typedef struct
     unsigned int this_key_frame_forced;
     unsigned int next_key_frame_forced;
 
+    // Ambient reconstruction err target for force key frames
+    int ambient_err;
+
     unsigned int mode_check_freq[MAX_MODES];
     unsigned int mode_test_hit_counts[MAX_MODES];
     unsigned int mode_chosen_counts[MAX_MODES];
@@ -399,6 +402,7 @@ typedef struct
     int kf_overspend_bits;            // Extra bits spent on key frames that need to be recovered on inter frames
     int kf_bitrate_adjustment;        // Current number of bit s to try and recover on each inter frame.
     int max_gf_interval;
+    int static_scene_max_gf_interval;
     int baseline_gf_interval;
     int gf_decay_rate;
     int active_arnr_frames;           // <= cpi->oxcf.arnr_max_frames
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index b2a3e117f..3b898f1b2 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -43,7 +43,9 @@
 #endif
 
 
-void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
+extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
+extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
+
 
 #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
 
@@ -1990,7 +1992,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
             }
 
-            vp8cx_mb_init_quantizer(cpi, x);
+            vp8_update_zbin_extra(cpi, x);
         }
 
         switch (this_mode)
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index cefa0a956..7178e7e31 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -493,8 +493,8 @@ sym(vp8_get8x8var_sse2):
 ;    unsigned char *src_ptr,
 ;    int src_pixels_per_line,
 ;    unsigned int Height,
-;    unsigned short *HFilter,
-;    unsigned short *VFilter,
+;    int  xoffset,
+;    int  yoffset,
 ;    int *sum,
 ;    unsigned int *sumsquared;;
 ;
@@ -504,68 +504,80 @@ sym(vp8_filter_block2d_bil_var_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 9
+    SAVE_XMM
     GET_GOT     rbx
     push rsi
     push rdi
-    sub         rsp, 16
+    push rbx
     ; end prolog
 
         pxor            xmm6,           xmm6                 ;
         pxor            xmm7,           xmm7                 ;
-        mov             rax,            arg(5) ;HFilter             ;
 
-        mov             rdx,            arg(6) ;VFilter             ;
-        mov             rsi,            arg(0) ;ref_ptr              ;
+        lea             rsi,            [GLOBAL(xmm_bi_rd)]  ; rounding
+        movdqa          xmm4,           XMMWORD PTR [rsi]
 
-        mov             rdi,            arg(2) ;src_ptr              ;
-        movsxd          rcx,            dword ptr arg(4) ;Height              ;
+        lea             rcx,            [GLOBAL(vp8_bilinear_filters_sse2)]
+        movsxd          rax,            dword ptr arg(5)     ; xoffset
+
+        cmp             rax,            0                    ; skip first_pass filter if xoffset=0
+        je              filter_block2d_bil_var_sse2_sp_only
+
+        shl             rax,            5                    ; point to filter coeff with xoffset
+        lea             rax,            [rax + rcx]          ; HFilter
+
+        movsxd          rdx,            dword ptr arg(6)     ; yoffset
+
+        cmp             rdx,            0                    ; skip second_pass filter if yoffset=0
+        je              filter_block2d_bil_var_sse2_fp_only
+
+        shl             rdx,            5
+        lea             rdx,            [rdx + rcx]          ; VFilter
+
+        mov             rsi,            arg(0)               ;ref_ptr
+        mov             rdi,            arg(2)               ;src_ptr
+        movsxd          rcx,            dword ptr arg(4)     ;Height
 
         pxor            xmm0,           xmm0                 ;
-        movq            xmm1,           QWORD PTR [rsi]               ;
+        movq            xmm1,           QWORD PTR [rsi]      ;
+        movq            xmm3,           QWORD PTR [rsi+1]    ;
 
-        movq            xmm3,           QWORD PTR [rsi+1]        ;
         punpcklbw       xmm1,           xmm0                 ;
-
-        pmullw          xmm1,           [rax]               ;
+        pmullw          xmm1,           [rax]                ;
         punpcklbw       xmm3,           xmm0
-            ;
         pmullw          xmm3,           [rax+16]             ;
-        paddw           xmm1,           xmm3                 ;
-
-        paddw           xmm1,           [GLOBAL(xmm_bi_rd)]  ;
-        psraw           xmm1,           xmm_filter_shift    ;
 
+        paddw           xmm1,           xmm3                 ;
+        paddw           xmm1,           xmm4                 ;
+        psraw           xmm1,           xmm_filter_shift     ;
         movdqa          xmm5,           xmm1
-%if ABI_IS_32BIT
-        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;
-%else
-        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line    ;
-        add             rsi,            r8
+
+        movsxd          rbx,            dword ptr arg(1) ;ref_pixels_per_line
+        lea             rsi,            [rsi + rbx]
+%if ABI_IS_32BIT=0
+        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line
 %endif
-filter_block2d_bil_var_sse2_loop:
 
+filter_block2d_bil_var_sse2_loop:
         movq            xmm1,           QWORD PTR [rsi]               ;
         movq            xmm3,           QWORD PTR [rsi+1]             ;
 
         punpcklbw       xmm1,           xmm0                 ;
         pmullw          xmm1,           [rax]               ;
-
         punpcklbw       xmm3,           xmm0                 ;
         pmullw          xmm3,           [rax+16]             ;
 
         paddw           xmm1,           xmm3                 ;
-        paddw           xmm1,           [GLOBAL(xmm_bi_rd)]  ;
-
+        paddw           xmm1,           xmm4               ;
         psraw           xmm1,           xmm_filter_shift    ;
-        movdqa          xmm3,           xmm5                 ;
 
+        movdqa          xmm3,           xmm5                 ;
         movdqa          xmm5,           xmm1                 ;
-        pmullw          xmm3,           [rdx]               ;
 
+        pmullw          xmm3,           [rdx]               ;
         pmullw          xmm1,           [rdx+16]             ;
         paddw           xmm1,           xmm3                 ;
-
-        paddw           xmm1,           [GLOBAL(xmm_bi_rd)]  ;
+        paddw           xmm1,           xmm4                 ;
         psraw           xmm1,           xmm_filter_shift    ;
 
         movq            xmm3,           QWORD PTR [rdi]               ;
@@ -577,20 +589,103 @@ filter_block2d_bil_var_sse2_loop:
         pmaddwd         xmm1,           xmm1                 ;
         paddd           xmm7,           xmm1                 ;
 
+        lea             rsi,            [rsi + rbx]          ;ref_pixels_per_line
 %if ABI_IS_32BIT
-        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;
-        add             rdi,            dword ptr arg(3) ;src_pixels_per_line    ;
+        add             rdi,            dword ptr arg(3)     ;src_pixels_per_line
 %else
-        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line    ;
-        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line    ;
-        add             rsi,            r8
-        add             rdi,            r9
+        lea             rdi,            [rdi + r9]
 %endif
 
         sub             rcx,            1                   ;
         jnz             filter_block2d_bil_var_sse2_loop       ;
 
+        jmp             filter_block2d_bil_variance
+
+filter_block2d_bil_var_sse2_sp_only:
+        movsxd          rdx,            dword ptr arg(6)     ; yoffset
+        shl             rdx,            5
+        lea             rdx,            [rdx + rcx]          ; VFilter
+
+        mov             rsi,            arg(0)               ;ref_ptr
+        mov             rdi,            arg(2)               ;src_ptr
+        movsxd          rcx,            dword ptr arg(4)     ;Height
+        movsxd          rax,            dword ptr arg(1)     ;ref_pixels_per_line
+
+        pxor            xmm0,           xmm0                 ;
+        movq            xmm1,           QWORD PTR [rsi]      ;
+        punpcklbw       xmm1,           xmm0                 ;
+
+        movsxd          rbx,            dword ptr arg(3)     ;src_pixels_per_line
+        lea             rsi,            [rsi + rax]
+
+filter_block2d_bil_sp_only_loop:
+        movq            xmm3,           QWORD PTR [rsi]             ;
+        punpcklbw       xmm3,           xmm0                 ;
+        movdqa          xmm5,           xmm3
+
+        pmullw          xmm1,           [rdx]               ;
+        pmullw          xmm3,           [rdx+16]             ;
+        paddw           xmm1,           xmm3                 ;
+        paddw           xmm1,           xmm4                 ;
+        psraw           xmm1,           xmm_filter_shift    ;
+
+        movq            xmm3,           QWORD PTR [rdi]               ;
+        punpcklbw       xmm3,           xmm0                 ;
+
+        psubw           xmm1,           xmm3                 ;
+        paddw           xmm6,           xmm1                 ;
+
+        pmaddwd         xmm1,           xmm1                 ;
+        paddd           xmm7,           xmm1                 ;
+
+        movdqa          xmm1,           xmm5                 ;
+        lea             rsi,            [rsi + rax]          ;ref_pixels_per_line
+        lea             rdi,            [rdi + rbx]          ;src_pixels_per_line
+
+        sub             rcx,            1                   ;
+        jnz             filter_block2d_bil_sp_only_loop       ;
+
+        jmp             filter_block2d_bil_variance
+
+filter_block2d_bil_var_sse2_fp_only:
+        mov             rsi,            arg(0)               ;ref_ptr
+        mov             rdi,            arg(2)               ;src_ptr
+        movsxd          rcx,            dword ptr arg(4)     ;Height
+        movsxd          rdx,            dword ptr arg(1)     ;ref_pixels_per_line
+
+        pxor            xmm0,           xmm0                 ;
+        movsxd          rbx,            dword ptr arg(3)     ;src_pixels_per_line
+
+filter_block2d_bil_fp_only_loop:
+        movq            xmm1,           QWORD PTR [rsi]       ;
+        movq            xmm3,           QWORD PTR [rsi+1]     ;
+
+        punpcklbw       xmm1,           xmm0                 ;
+        pmullw          xmm1,           [rax]               ;
+        punpcklbw       xmm3,           xmm0                 ;
+        pmullw          xmm3,           [rax+16]             ;
+
+        paddw           xmm1,           xmm3                 ;
+        paddw           xmm1,           xmm4  ;
+        psraw           xmm1,           xmm_filter_shift    ;
+
+        movq            xmm3,           QWORD PTR [rdi]     ;
+        punpcklbw       xmm3,           xmm0                 ;
+
+        psubw           xmm1,           xmm3                 ;
+        paddw           xmm6,           xmm1                 ;
+
+        pmaddwd         xmm1,           xmm1                 ;
+        paddd           xmm7,           xmm1                 ;
+        lea             rsi,            [rsi + rdx]
+        lea             rdi,            [rdi + rbx]          ;src_pixels_per_line
+
+        sub             rcx,            1                   ;
+        jnz             filter_block2d_bil_fp_only_loop       ;
+
+        jmp             filter_block2d_bil_variance
 
+filter_block2d_bil_variance:
         movdq2q         mm6,            xmm6                ;
         movdq2q         mm7,            xmm7                ;
 
@@ -627,12 +722,12 @@ filter_block2d_bil_var_sse2_loop:
         movd            [rsi],          mm2    ; xsum
         movd            [rdi],          mm4    ; xxsum
 
-
     ; begin epilog
-    add rsp, 16
+    pop rbx
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -974,3 +1069,13 @@ SECTION_RODATA
 align 16
 xmm_bi_rd:
     times 8 dw 64
+align 16
+vp8_bilinear_filters_sse2:
+    dw 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0
+    dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+    dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+    dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+    dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+    dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+    dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+    dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 006e0a24a..6f79f0d23 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -76,8 +76,8 @@ void vp8_filter_block2d_bil_var_sse2
     const unsigned char *src_ptr,
     int src_pixels_per_line,
     unsigned int Height,
-    const short *HFilter,
-    const short *VFilter,
+    int  xoffset,
+    int  yoffset,
     int *sum,
     unsigned int *sumsquared
 );
@@ -222,21 +222,6 @@ unsigned int vp8_variance8x16_wmt
 
 }
 
-///////////////////////////////////////////////////////////////////////////
-// the mmx function that does the bilinear filtering and var calculation //
-// int one pass                                                          //
-///////////////////////////////////////////////////////////////////////////
-DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) =
-{
-    { 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0 },
-    { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
-    {  96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
-    {  80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
-    {  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
-    {  48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
-    {  32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
-    {  16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
-};
 unsigned int vp8_sub_pixel_variance4x4_wmt
 (
     const unsigned char  *src_ptr,
@@ -272,15 +257,38 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
     unsigned int *sse
 )
 {
-
     int xsum;
     unsigned int xxsum;
-    vp8_filter_block2d_bil_var_sse2(
-        src_ptr, src_pixels_per_line,
-        dst_ptr, dst_pixels_per_line, 8,
-        vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
-        &xsum, &xxsum
-    );
+
+    if (xoffset == 4 && yoffset == 0)
+    {
+        vp8_half_horiz_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum, &xxsum);
+    }
+    else if (xoffset == 0 && yoffset == 4)
+    {
+        vp8_half_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum, &xxsum);
+    }
+    else if (xoffset == 4 && yoffset == 4)
+    {
+        vp8_half_horiz_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum, &xxsum);
+    }
+    else
+    {
+        vp8_filter_block2d_bil_var_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            xoffset, yoffset,
+            &xsum, &xxsum);
+    }
 
     *sse = xxsum;
     return (xxsum - ((xsum * xsum) >> 6));
@@ -344,7 +352,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
         vp8_filter_block2d_bil_var_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
-            vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
+            xoffset, yoffset,
             &xsum0, &xxsum0
         );
 
@@ -352,7 +360,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
         vp8_filter_block2d_bil_var_sse2(
             src_ptr + 8, src_pixels_per_line,
             dst_ptr + 8, dst_pixels_per_line, 16,
-            vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
+            xoffset, yoffset,
             &xsum1, &xxsum1
         );
     }
@@ -392,21 +400,56 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
     int xsum0, xsum1;
     unsigned int xxsum0, xxsum1;
 
+    if (xoffset == 4 && yoffset == 0)
+    {
+        vp8_half_horiz_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum0, &xxsum0);
 
-    vp8_filter_block2d_bil_var_sse2(
-        src_ptr, src_pixels_per_line,
-        dst_ptr, dst_pixels_per_line, 8,
-        vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
-        &xsum0, &xxsum0
-    );
+        vp8_half_horiz_variance16x_h_sse2(
+            src_ptr + 8, src_pixels_per_line,
+            dst_ptr + 8, dst_pixels_per_line, 8,
+            &xsum1, &xxsum1);
+    }
+    else if (xoffset == 0 && yoffset == 4)
+    {
+        vp8_half_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum0, &xxsum0);
+
+        vp8_half_vert_variance16x_h_sse2(
+            src_ptr + 8, src_pixels_per_line,
+            dst_ptr + 8, dst_pixels_per_line, 8,
+            &xsum1, &xxsum1);
+    }
+    else if (xoffset == 4 && yoffset == 4)
+    {
+        vp8_half_horiz_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum0, &xxsum0);
 
+        vp8_half_horiz_vert_variance16x_h_sse2(
+            src_ptr + 8, src_pixels_per_line,
+            dst_ptr + 8, dst_pixels_per_line, 8,
+            &xsum1, &xxsum1);
+    }
+    else
+    {
+        vp8_filter_block2d_bil_var_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            xoffset, yoffset,
+            &xsum0, &xxsum0);
 
-    vp8_filter_block2d_bil_var_sse2(
-        src_ptr + 8, src_pixels_per_line,
-        dst_ptr + 8, dst_pixels_per_line, 8,
-        vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
-        &xsum1, &xxsum1
-    );
+        vp8_filter_block2d_bil_var_sse2(
+            src_ptr + 8, src_pixels_per_line,
+            dst_ptr + 8, dst_pixels_per_line, 8,
+            xoffset, yoffset,
+            &xsum1, &xxsum1);
+    }
 
     xsum0 += xsum1;
     xxsum0 += xxsum1;
@@ -428,12 +471,36 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
 {
     int xsum;
     unsigned int xxsum;
-    vp8_filter_block2d_bil_var_sse2(
-        src_ptr, src_pixels_per_line,
-        dst_ptr, dst_pixels_per_line, 16,
-        vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
-        &xsum, &xxsum
-    );
+
+    if (xoffset == 4 && yoffset == 0)
+    {
+        vp8_half_horiz_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 16,
+            &xsum, &xxsum);
+    }
+    else if (xoffset == 0 && yoffset == 4)
+    {
+        vp8_half_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 16,
+            &xsum, &xxsum);
+    }
+    else if (xoffset == 4 && yoffset == 4)
+    {
+        vp8_half_horiz_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 16,
+            &xsum, &xxsum);
+    }
+    else
+    {
+        vp8_filter_block2d_bil_var_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 16,
+            xoffset, yoffset,
+            &xsum, &xxsum);
+    }
 
     *sse = xxsum;
     return (xxsum - ((xsum * xsum) >> 7));
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index c1ed080eb..31438f916 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -314,8 +314,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3;
         cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3;
         cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3;
+#if !(CONFIG_REALTIME_ONLY)
         cpi->rtcd.search.full_search             = vp8_full_search_sadx3;
-
+#endif
         cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3;
         cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3;
         cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3;
@@ -344,7 +345,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4;
         cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4;
         cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4;
+#if !(CONFIG_REALTIME_ONLY)
         cpi->rtcd.search.full_search             = vp8_full_search_sadx8;
+#endif
     }
 #endif
 
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 4c7aca520..903c56c88 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -194,6 +194,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
     RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
 
+#if !(CONFIG_REALTIME_ONLY)
     if (cfg->g_pass == VPX_RC_LAST_PASS)
     {
         int              mb_r = (cfg->g_h + 15) / 16;
@@ -217,6 +218,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
         if ((int)(stats->count + 0.5) != n_packets - 1)
             ERROR("rc_twopass_stats_in missing EOS stats packet");
     }
+#endif
 
     return VPX_CODEC_OK;
 }
@@ -310,6 +312,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
 
     oxcf->best_allowed_q          = cfg.rc_min_quantizer;
     oxcf->worst_allowed_q         = cfg.rc_max_quantizer;
+    oxcf->cq_level                = vp8_cfg.cq_level;
     oxcf->fixed_q = -1;
 
     oxcf->under_shoot_pct         = cfg.rc_undershoot_pct;
@@ -346,7 +349,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
     oxcf->arnr_type =      vp8_cfg.arnr_type;
 
     oxcf->tuning = vp8_cfg.tuning;
-    oxcf->cq_level = vp8_cfg.cq_level;
 
     /*
         printf("Current VP8 Settings: \n");
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index a2ad59662..1b1cf3b94 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -709,6 +709,25 @@ static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
 }
 
 
+static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
+                                               int ctrl_id,
+                                               va_list args)
+{
+
+    int *corrupted = va_arg(args, int *);
+
+    if (corrupted)
+    {
+        VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
+        *corrupted = pbi->common.frame_to_show->corrupted;
+
+        return VPX_CODEC_OK;
+    }
+    else
+        return VPX_CODEC_INVALID_PARAM;
+
+}
+
 vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
 {
     {VP8_SET_REFERENCE,             vp8_set_reference},
@@ -719,6 +738,7 @@ vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
     {VP8_SET_DBG_COLOR_B_MODES,     vp8_set_dbg_options},
     {VP8_SET_DBG_DISPLAY_MV,        vp8_set_dbg_options},
     {VP8D_GET_LAST_REF_UPDATES,     vp8_get_last_ref_updates},
+    {VP8D_GET_FRAME_CORRUPTED,      vp8_get_frame_corrupted},
     { -1, NULL},
 };
 
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index da27e0897..4113f2395 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -16,6 +16,7 @@
 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c
 
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/variance_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index b52470b51..27049a51e 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -118,7 +118,9 @@ vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t    *ctx,
 {
     vpx_codec_err_t res;
 
-    if (!ctx || !data || !data_sz)
+    /* Sanity checks */
+    /* NULL data ptr allowed if data_sz is 0 too */
+    if (!ctx || (!data && data_sz))
         res = VPX_CODEC_INVALID_PARAM;
     else if (!ctx->iface || !ctx->priv)
         res = VPX_CODEC_ERROR;
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 9feab7cc2..16bc07c60 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -45,6 +45,7 @@ enum vp8d_dec_control_id
     VP8_DECODER_CTRL_ID_START   = 256,
     VP8D_GET_LAST_REF_UPDATES,              /**< control function to get info on which reference frames were updated
                                             by the last decode */
+    VP8D_GET_FRAME_CORRUPTED,               /**< check if the indicated frame is corrupted */
     VP8_DECODER_CTRL_ID_MAX
 } ;
 
@@ -58,6 +59,7 @@ enum vp8d_dec_control_id
 
 
 VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES,   int *)
+VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED,    int *)
 
 
 /*! @} - end defgroup vp8_decoder */
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index d9d228551..e7c5b189c 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -81,6 +81,8 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
 
         ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2  * ybf->uv_stride) + border / 2;
         ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2  * ybf->uv_stride) + border / 2;
+
+        ybf->corrupted = 0; /* assume not currupted by errors */
     }
     else
     {
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 5dcee818a..0b08db3ef 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -57,6 +57,8 @@ extern "C"
         int border;
         int frame_size;
         YUV_TYPE clrtype;
+
+        int corrupted;
     } YV12_BUFFER_CONFIG;
 
     int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);