6 files changed, 103 insertions, 19 deletions
diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh
index bdd6690df..89fa68186 100755
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -18,6 +18,10 @@ set -e
 devnull='> /dev/null 2>&1'
 
 BUILD_ROOT="_iosbuild"
+CONFIGURE_ARGS="--disable-docs
+                --disable-examples
+                --disable-libyuv
+                --disable-unit-tests"
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
@@ -43,7 +47,7 @@ build_target() {
   mkdir "${target}"
   cd "${target}"
   eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-      --disable-docs ${EXTRA_CONFIGURE_ARGS} ${devnull}
+    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull}
   export DIST_DIR
   eval make -j ${MAKE_JOBS} dist ${devnull}
   cd "${old_pwd}"
@@ -252,6 +256,7 @@ if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
   BUILD_ROOT=${BUILD_ROOT}
   DIST_DIR=${DIST_DIR}
+  CONFIGURE_ARGS=${CONFIGURE_ARGS}
   EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
   FRAMEWORK_DIR=${FRAMEWORK_DIR}
   HEADER_DIR=${HEADER_DIR}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 947d8c7bb..12f076fed 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1166,7 +1166,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
 
   add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/vp9_fdct8x8_quant sse2 ssse3/;
+  specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
 }
 
 #
diff --git a/vp9/encoder/arm/neon/vp9_dct_neon.c b/vp9/encoder/arm/neon/vp9_dct_neon.c
index 6c66f5d5b..a6d4797ad 100644
--- a/vp9/encoder/arm/neon/vp9_dct_neon.c
+++ b/vp9/encoder/arm/neon/vp9_dct_neon.c
@@ -32,6 +32,24 @@ void vp9_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
   }
 }
 
+void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
+                            int16_t* coeff_ptr, intptr_t n_coeffs,
+                            int skip_block, const int16_t* zbin_ptr,
+                            const int16_t* round_ptr, const int16_t* quant_ptr,
+                            const int16_t* quant_shift_ptr,
+                            int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
+                            const int16_t* dequant_ptr, uint16_t* eob_ptr,
+                            const int16_t* scan_ptr,
+                            const int16_t* iscan_ptr) {
+  int16_t temp_buffer[64];
+  (void)coeff_ptr;
+
+  vp9_fdct8x8_neon(input, temp_buffer, stride);
+  vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
+                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+                       dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
+}
+
 void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
   int i;
   // stage 1
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ec7f2a98b..89714ac3a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -402,7 +402,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
                                BLOCK_SIZE bsize,
                                int mi_row,
                                int mi_col,
-                               int threshold,
+                               int64_t threshold,
                                BLOCK_SIZE bsize_min) {
   VP9_COMMON * const cm = &cpi->common;
   variance_node vt;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 3b0f2f012..9fc63e3f0 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -38,6 +38,8 @@
 #define OUTPUT_FPF          0
 #define ARF_STATS_OUTPUT    0
 
+#define GROUP_ADAPTIVE_MAXQ 0
+
 #define BOOST_BREAKOUT      12.5
 #define BOOST_FACTOR        12.5
 #define ERR_DIVISOR         128.0
@@ -54,6 +56,7 @@
 #define NEW_MV_MODE_PENALTY 32
 #define SVC_FACTOR_PT_LOW   0.45
 #define DARK_THRESH         64
+#define DEFAULT_GRP_WEIGHT  1.0
 
 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
 
@@ -1082,8 +1085,9 @@ static double calc_correction_factor(double err_per_mb,
 #define EDIV_SIZE_FACTOR 800
 
 static int get_twopass_worst_quality(const VP9_COMP *cpi,
-                                     const FIRSTPASS_STATS *stats,
-                                     int section_target_bandwidth) {
+                                     const double section_err,
+                                     int section_target_bandwidth,
+                                     double group_weight_factor) {
   const RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 
@@ -1092,7 +1096,6 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
   } else {
     const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
                         ? cpi->initial_mbs : cpi->common.MBs;
-    const double section_err = stats->coded_error / stats->count;
     const double err_per_mb = section_err / num_mbs;
     const double speed_term = 1.0 + 0.04 * oxcf->speed;
     const double ediv_size_correction = num_mbs / EDIV_SIZE_FACTOR;
@@ -1101,9 +1104,11 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
 
     int q;
     int is_svc_upper_layer = 0;
+
     if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)
       is_svc_upper_layer = 1;
 
+
     // Try and pick a max Q that will be high enough to encode the
     // content at the given rate.
     for (q = rc->best_quality; q < rc->worst_quality; ++q) {
@@ -1113,9 +1118,10 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
                                  is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
                                  FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
                                  cpi->common.bit_depth);
-      const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q,
-                                                 factor * speed_term,
-                                                 cpi->common.bit_depth);
+      const int bits_per_mb =
+        vp9_rc_bits_per_mb(INTER_FRAME, q,
+                           factor * speed_term * group_weight_factor,
+                           cpi->common.bit_depth);
       if (bits_per_mb <= target_norm_bits_per_mb)
         break;
     }
@@ -1699,6 +1705,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double boost_score = 0.0;
   double old_boost_score = 0.0;
   double gf_group_err = 0.0;
+#if GROUP_ADAPTIVE_MAXQ
+  double gf_group_raw_error = 0.0;
+#endif
   double gf_first_frame_err = 0.0;
   double mod_frame_err = 0.0;
 
@@ -1742,8 +1751,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   // If this is a key frame or the overlay from a previous arf then
   // the error score / cost of this frame has already been accounted for.
-  if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active)
+  if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active) {
     gf_group_err -= gf_first_frame_err;
+#if GROUP_ADAPTIVE_MAXQ
+    gf_group_raw_error -= this_frame->coded_error;
+#endif
+  }
 
   // Motion breakout threshold for loop below depends on image size.
   mv_ratio_accumulator_thresh =
@@ -1782,6 +1795,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // Accumulate error score of frames in this gf group.
     mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
     gf_group_err += mod_frame_err;
+#if GROUP_ADAPTIVE_MAXQ
+    gf_group_raw_error += this_frame->coded_error;
+#endif
 
     if (EOF == input_stats(twopass, &next_frame))
       break;
@@ -1863,6 +1879,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       if (EOF == input_stats(twopass, this_frame))
         break;
       gf_group_err += calculate_modified_err(twopass, oxcf, this_frame);
+#if GROUP_ADAPTIVE_MAXQ
+      gf_group_raw_error += this_frame->coded_error;
+#endif
     }
     rc->baseline_gf_interval = new_gf_interval;
   }
@@ -1893,6 +1912,29 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Calculate the bits to be allocated to the gf/arf group as a whole
   gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
 
+#if GROUP_ADAPTIVE_MAXQ
+  // Calculate an estimate of the maxq needed for the group.
+  // We are more agressive about correcting for sections
+  // where there could be significant overshoot than for easier
+  // sections where we do not wish to risk creating an overshoot
+  // of the allocated bit budget.
+  if ((cpi->oxcf.rc_mode != VPX_Q) && (rc->baseline_gf_interval > 1)) {
+    const int vbr_group_bits_per_frame =
+      (int)(gf_group_bits / rc->baseline_gf_interval);
+    const double group_av_err = gf_group_raw_error  / rc->baseline_gf_interval;
+    const int tmp_q =
+      get_twopass_worst_quality(cpi, group_av_err, vbr_group_bits_per_frame,
+                                twopass->kfgroup_inter_fraction);
+
+    if (tmp_q < twopass->baseline_worst_quality) {
+      twopass->active_worst_quality =
+        (tmp_q + twopass->baseline_worst_quality + 1) / 2;
+    } else {
+      twopass->active_worst_quality = tmp_q;
+    }
+  }
+#endif
+
   // Calculate the extra bits to be used for boosted frame(s)
   gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
                                      rc->gfu_boost, gf_group_bits);
@@ -2116,7 +2158,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // Reset to the start of the group.
     reset_fpf_position(twopass, start_position);
 
-    kf_group_err = 0;
+    kf_group_err = 0.0;
 
     // Rescan to get the correct error data for the forced kf group.
     for (i = 0; i < rc->frames_to_key; ++i) {
@@ -2226,6 +2268,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
                                   rc->kf_boost, twopass->kf_group_bits);
 
+  // Work out the fraction of the kf group bits reserved for the inter frames
+  // within the group after discounting the bits for the kf itself.
+  if (twopass->kf_group_bits) {
+    twopass->kfgroup_inter_fraction =
+      (double)(twopass->kf_group_bits - kf_bits) /
+      (double)twopass->kf_group_bits;
+  } else {
+    twopass->kfgroup_inter_fraction = 1.0;
+  }
+
   twopass->kf_group_bits -= kf_bits;
 
   // Save the bits to spend on the key frame.
@@ -2316,7 +2368,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   GF_GROUP *const gf_group = &twopass->gf_group;
   int frames_left;
   FIRSTPASS_STATS this_frame;
-  FIRSTPASS_STATS this_frame_copy;
 
   int target_rate;
   LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?
@@ -2374,9 +2425,14 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
-    const int tmp_q = get_twopass_worst_quality(cpi, &twopass->total_left_stats,
-                                                section_target_bandwidth);
+    const double section_error =
+      twopass->total_left_stats.coded_error / twopass->total_left_stats.count;
+    const int tmp_q =
+      get_twopass_worst_quality(cpi, section_error,
+                                section_target_bandwidth, DEFAULT_GRP_WEIGHT);
+
     twopass->active_worst_quality = tmp_q;
+    twopass->baseline_worst_quality = tmp_q;
     rc->ni_av_qi = tmp_q;
     rc->last_q[INTER_FRAME] = tmp_q;
     rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth);
@@ -2388,13 +2444,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   if (EOF == input_stats(twopass, &this_frame))
     return;
 
-  // Local copy of the current frame's first pass stats.
-  this_frame_copy = this_frame;
-
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
+    FIRSTPASS_STATS this_frame_copy;
+    this_frame_copy = this_frame;
     // Define next KF group and assign bits to it.
-    find_next_key_frame(cpi, &this_frame_copy);
+    find_next_key_frame(cpi, &this_frame);
+    this_frame = this_frame_copy;
   } else {
     cm->frame_type = INTER_FRAME;
   }
@@ -2423,7 +2479,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
   // Define a new GF/ARF group. (Should always enter here for key frames).
   if (rc->frames_till_gf_update_due == 0) {
-    define_gf_group(cpi, &this_frame_copy);
+    define_gf_group(cpi, &this_frame);
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
     if (lc != NULL)
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index a8e4987cc..34767706a 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -109,11 +109,16 @@ typedef struct {
 
   // Error score of frames still to be coded in kf group
   int64_t kf_group_error_left;
+
+  // The fraction for a kf groups total bits allocated to the inter frames
+  double kfgroup_inter_fraction;
+
   int sr_update_lag;
 
   int kf_zeromotion_pct;
   int last_kfgroup_zeromotion_pct;
   int gf_zeromotion_pct;
+  int baseline_worst_quality;
   int active_worst_quality;
   int extend_minq;
   int extend_maxq;