22 files changed, 540 insertions, 106 deletions
diff --git a/test/acm_random.h b/test/acm_random.h
index ff5c93ea1..a29ced2f7 100644
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -32,6 +32,12 @@ class ACMRandom {
     return (value >> 15) & 0xffff;
   }
 
+  int16_t Rand9Signed(void) {
+    // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
+    const uint32_t value = random_.Generate(512);
+    return static_cast<int16_t>(value - 256);
+  }
+
   uint8_t Rand8(void) {
     const uint32_t value =
         random_.Generate(testing::internal::Random::kMaxRange);
diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc
new file mode 100644
index 000000000..0bf6f4af7
--- /dev/null
+++ b/test/hadamard_test.cc
@@ -0,0 +1,126 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <algorithm>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*Hadamard8x8Func)(const int16_t *a, int a_stride,
+                                int16_t *b);
+
+class HadamardTest : public ::testing::TestWithParam<Hadamard8x8Func> {
+ public:
+  virtual void SetUp() {
+    h_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  Hadamard8x8Func h_func_;
+  ACMRandom rnd_;
+};
+
+void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
+  int16_t b[8];
+  for (int i = 0; i < 8; i += 2) {
+    b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
+    b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
+  }
+  int16_t c[8];
+  for (int i = 0; i < 8; i += 4) {
+    c[i + 0] = b[i + 0] + b[i + 2];
+    c[i + 1] = b[i + 1] + b[i + 3];
+    c[i + 2] = b[i + 0] - b[i + 2];
+    c[i + 3] = b[i + 1] - b[i + 3];
+  }
+  out[0] = c[0] + c[4];
+  out[7] = c[1] + c[5];
+  out[3] = c[2] + c[6];
+  out[4] = c[3] + c[7];
+  out[2] = c[0] - c[4];
+  out[6] = c[1] - c[5];
+  out[1] = c[2] - c[6];
+  out[5] = c[3] - c[7];
+}
+
+void reference_hadamard(const int16_t *a, int a_stride, int16_t *b) {
+  int16_t buf[64];
+  for (int i = 0; i < 8; i++) {
+    hadamard_loop(a + i, a_stride, buf + i * 8);
+  }
+
+  for (int i = 0; i < 8; i++) {
+    hadamard_loop(buf + i, 8, b + i * 8);
+  }
+}
+
+TEST_P(HadamardTest, CompareReferenceRandom) {
+  DECLARE_ALIGNED(16, int16_t, a[64]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
+  for (int i = 0; i < 64; i++) {
+    a[i] = rnd_.Rand9Signed();
+  }
+  memset(b, 0, sizeof(b));
+  memset(b_ref, 0, sizeof(b_ref));
+
+  reference_hadamard(a, 8, b_ref);
+  ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
+
+  // The order of the output is not important. Sort before checking.
+  std::sort(b, b + 64);
+  std::sort(b_ref, b_ref + 64);
+  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(HadamardTest, VaryStride) {
+  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
+  for (int i = 0; i < 64 * 8; i++) {
+    a[i] = rnd_.Rand9Signed();
+  }
+
+  for (int i = 8; i < 64; i += 8) {
+    memset(b, 0, sizeof(b));
+    memset(b_ref, 0, sizeof(b_ref));
+
+    reference_hadamard(a, i, b_ref);
+    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + 64);
+    std::sort(b_ref, b_ref + 64);
+    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, HadamardTest,
+                        ::testing::Values(&vpx_hadamard_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, HadamardTest,
+                        ::testing::Values(&vpx_hadamard_8x8_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSSE3, HadamardTest,
+                        ::testing::Values(&vpx_hadamard_8x8_ssse3));
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
+}  // namespace
diff --git a/test/minmax_test.cc b/test/minmax_test.cc
new file mode 100644
index 000000000..dbe4342dc
--- /dev/null
+++ b/test/minmax_test.cc
@@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
+                           const uint8_t *b, int b_stride,
+                           int *min, int *max);
+
+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
+ public:
+  virtual void SetUp() {
+    mm_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  MinMaxFunc mm_func_;
+  ACMRandom rnd_;
+};
+
+void reference_minmax(const uint8_t *a, int a_stride,
+                      const uint8_t *b, int b_stride,
+                      int *min_ret, int *max_ret) {
+  int min = 255;
+  int max = 0;
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
+      if (min > diff) min = diff;
+      if (max < diff) max = diff;
+    }
+  }
+
+  *min_ret = min;
+  *max_ret = max;
+}
+
+TEST_P(MinMaxTest, MinValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 255, sizeof(b));
+    b[i] = i;  // Set a minimum difference of i.
+
+    int min, max;
+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(255, max);
+    EXPECT_EQ(i, min);
+  }
+}
+
+TEST_P(MinMaxTest, MaxValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 0, sizeof(b));
+    b[i] = i;  // Set a maximum difference of i.
+
+    int min, max;
+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(i, max);
+    EXPECT_EQ(0, min);
+  }
+}
+
+TEST_P(MinMaxTest, CompareReference) {
+  uint8_t a[64], b[64];
+  for (int j = 0; j < 64; j++) {
+    a[j] = rnd_.Rand8();
+    b[j] = rnd_.Rand8();
+  }
+
+  int min_ref, max_ref, min, max;
+  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+  ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+  EXPECT_EQ(max_ref, max);
+  EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
+  uint8_t a[8 * 64], b[8 * 64];
+  for (int i = 0; i < 8 * 64; i++) {
+    a[i] = rnd_.Rand8();
+    b[i] = rnd_.Rand8();
+  }
+  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+      int min_ref, max_ref, min, max;
+      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;;
+      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;;
+    }
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
+                        ::testing::Values(&vpx_minmax_8x8_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
+                        ::testing::Values(&vpx_minmax_8x8_neon));
+#endif
+
+}  // namespace
diff --git a/test/test.mk b/test/test.mk
index 693185362..7c22ca501 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -143,6 +143,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 2d98e77aa..461462552 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -1072,8 +1072,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
       }
       break;
   }
-
-  vp9_adjust_mask(cm, mi_row, mi_col, lfm);
 }
 
 static void filter_selectively_vert(uint8_t *s, int pitch,
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
index c8ef618b7..7af61629a 100644
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
 void vp9_clearall_segfeatures(struct segmentation *seg) {
   vp9_zero(seg->feature_data);
   vp9_zero(seg->feature_mask);
+  seg->aq_av_offset = 0;
 }
 
 void vp9_enable_segfeature(struct segmentation *seg, int segment_id,
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index 5b75d8d4e..7ea7c3dd7 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -47,6 +47,7 @@ struct segmentation {
 
   int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
   unsigned int feature_mask[MAX_SEGMENTS];
+  int aq_av_offset;
 };
 
 static INLINE int segfeature_active(const struct segmentation *seg,
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 1c8d2b45d..6b651039c 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -23,7 +23,6 @@
 
 CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
   size_t last_coded_q_map_size;
-  size_t consec_zero_mv_size;
   CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
   if (cr == NULL)
     return NULL;
@@ -41,21 +40,12 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
   }
   assert(MAXQ <= 255);
   memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
-
-  consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
-  cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
-  if (cr->consec_zero_mv == NULL) {
-    vp9_cyclic_refresh_free(cr);
-    return NULL;
-  }
-  memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
   return cr;
 }
 
 void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
   vpx_free(cr->map);
   vpx_free(cr->last_coded_q_map);
-  vpx_free(cr->consec_zero_mv);
   vpx_free(cr);
 }
 
@@ -245,7 +235,6 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
                                              BLOCK_SIZE bsize) {
   const VP9_COMMON *const cm = &cpi->common;
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  MV mv = mi->mv[0].as_mv;
   const int bw = num_8x8_blocks_wide_lookup[bsize];
   const int bh = num_8x8_blocks_high_lookup[bsize];
   const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
@@ -269,15 +258,8 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
             clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id],
                   0, MAXQ),
             cr->last_coded_q_map[map_offset]);
-      // Update the consecutive zero/low_mv count.
-      if (is_inter_block(mi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) {
-        if (cr->consec_zero_mv[map_offset] < 255)
-          cr->consec_zero_mv[map_offset]++;
-      } else {
-        cr->consec_zero_mv[map_offset] = 0;
       }
     }
-  }
 }
 
 // Update the actual number of blocks that were applied the segment delta q.
@@ -442,7 +424,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
         if (cr->map[bl_index2] == 0) {
           count_tot++;
           if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
-              cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
+              cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
             sum_map++;
             count_sel++;
           }
@@ -545,8 +527,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     if (cm->frame_type == KEY_FRAME) {
       memset(cr->last_coded_q_map, MAXQ,
              cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
-      memset(cr->consec_zero_mv, 0,
-             cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv));
       cr->sb_index = 0;
     }
     return;
@@ -621,7 +601,6 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
   memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
-  memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
   cr->sb_index = 0;
   cpi->refresh_golden_frame = 1;
   cpi->refresh_alt_ref_frame = 1;
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 095b9283f..35eea182f 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -53,8 +53,6 @@ struct CYCLIC_REFRESH {
   signed char *map;
   // Map of the last q a block was coded at.
   uint8_t *last_coded_q_map;
-  // Count on how many consecutive times a block uses ZER0MV for encoding.
-  uint8_t *consec_zero_mv;
   // Thresholds applied to the projected rate/distortion of the coding block,
   // when deciding whether block should be refreshed.
   int64_t thresh_rate_sb;
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index dbca633c7..42d456e89 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -21,12 +21,6 @@
 #include "vp9/encoder/vp9_denoiser.h"
 #include "vp9/encoder/vp9_encoder.h"
 
-/* The VP9 denoiser is similar to that of the VP8 denoiser. While
- * choosing the motion vectors / reference frames, the denoiser is run, and if
- * it did not modify the signal to much, the denoised block is copied to the
- * signal.
- */
-
 #ifdef OUTPUT_YUV_DENOISED
 static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
 #endif
@@ -49,16 +43,19 @@ static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
 }
 
 static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
-  return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40);
+  return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40);
 }
 
 static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
                            int motion_magnitude) {
   if (motion_magnitude >
       noise_motion_thresh(bs, increase_denoising)) {
-    return 0;
+    if (increase_denoising)
+      return (1 << num_pels_log2_lookup[bs]) << 2;
+    else
+      return 0;
   } else {
-    return (1 << num_pels_log2_lookup[bs]) * 20;
+    return (1 << num_pels_log2_lookup[bs]) << 4;
   }
 }
 
@@ -215,12 +212,14 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
   // Avoid denoising for small block (unless motion is small).
   // Small blocks are selected in variance partition (before encoding) and
   // will typically lie on moving areas.
-  if (motion_magnitude > 16 && bs <= BLOCK_8X8)
+  if (denoiser->denoising_level < kDenHigh &&
+      motion_magnitude > 16 && bs <= BLOCK_8X8)
     return COPY_BLOCK;
 
   // If the best reference frame uses inter-prediction and there is enough of a
   // difference in sum-squared-error, use it.
   if (frame != INTRA_FRAME &&
+      ctx->newmv_sse != UINT_MAX &&
       sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
     mi->ref_frame[0] = ctx->best_reference_frame;
     mi->mode = ctx->best_sse_inter_mode;
@@ -242,6 +241,9 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
     ctx->best_sse_inter_mode = ZEROMV;
     ctx->best_sse_mv.as_int = 0;
     *zeromv_filter = 1;
+    if (denoiser->denoising_level > kDenMedium) {
+      motion_magnitude = 0;
+    }
   }
 
   if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
@@ -334,14 +336,13 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
 
   if (cpi->use_skin_detection &&
       bs <= BLOCK_32X32 &&
-      denoiser->denoising_level >= kDenLow) {
+      denoiser->denoising_level < kDenHigh) {
     int motion_level = (motion_magnitude < 16) ? 0 : 1;
     // If motion for current block is small/zero, compute consec_zeromv for
     // skin detection (early exit in skin detection is done for large
     // consec_zeromv when current block has small/zero motion).
     consec_zeromv = 0;
     if (motion_level == 0) {
-      CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
       VP9_COMMON * const cm = &cpi->common;
       int j, i;
       // Loop through the 8x8 sub-blocks.
@@ -354,7 +355,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
       for (i = 0; i < ymis; i++) {
         for (j = 0; j < xmis; j++) {
           int bl_index = block_index + i * cm->mi_cols + j;
-          consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], consec_zeromv);
+          consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], consec_zeromv);
           // No need to keep checking 8x8 blocks if any of the sub-blocks
           // has small consec_zeromv (since threshold for no_skin based on
           // zero/small motion in skin detection is high, i.e, > 4).
@@ -376,8 +377,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
                                      motion_level);
   }
   if (!is_skin &&
-      denoiser->denoising_level == kDenHigh &&
-      motion_magnitude < 16) {
+      denoiser->denoising_level == kDenHigh) {
     denoiser->increase_denoising = 1;
   } else {
     denoiser->increase_denoising = 0;
@@ -494,12 +494,12 @@ void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
   ctx->zeromv_sse = UINT_MAX;
   ctx->newmv_sse = UINT_MAX;
   ctx->zeromv_lastref_sse = UINT_MAX;
+  ctx->best_sse_mv.as_int = 0;
 }
 
 void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
                                      PREDICTION_MODE mode,
                                      PICK_MODE_CONTEXT *ctx) {
-  // TODO(tkopp): Use both MVs if possible
   if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
     ctx->zeromv_sse = sse;
     ctx->best_zeromv_reference_frame = mi->ref_frame[0];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 634e5cfe8..73adf2a8d 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -780,7 +780,6 @@ static int choose_partitioning(VP9_COMP *cpi,
 #if !CONFIG_VP9_HIGHBITDEPTH
     if (cpi->use_skin_detection && !low_res && (mi_col >= 8 &&
         mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows)) {
-      CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
       int bl_index1, bl_index2, bl_index3;
       int num_16x16_skin = 0;
       int num_16x16_nonskin = 0;
@@ -803,10 +802,10 @@ static int choose_partitioning(VP9_COMP *cpi,
           bl_index1 = bl_index + 1;
           bl_index2 = bl_index + cm->mi_cols;
           bl_index3 = bl_index2 + 1;
-          consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index],
-                                 VPXMIN(cr->consec_zero_mv[bl_index1],
-                                 VPXMIN(cr->consec_zero_mv[bl_index2],
-                                 cr->consec_zero_mv[bl_index3])));
+          consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+                                 VPXMIN(cpi->consec_zero_mv[bl_index1],
+                                 VPXMIN(cpi->consec_zero_mv[bl_index2],
+                                 cpi->consec_zero_mv[bl_index3])));
           is_skin = vp9_compute_skin_block(ysignal,
                                            usignal,
                                            vsignal,
@@ -4138,6 +4137,31 @@ static INTERP_FILTER get_interp_filter(
   }
 }
 
+static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
+  struct segmentation *const seg = &cm->seg;
+
+  int mi_row, mi_col;
+  int sum_delta = 0;
+  int map_index = 0;
+  int qdelta_index;
+  int segment_id;
+
+  for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+    MODE_INFO **mi_8x8 = mi_8x8_ptr;
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
+      segment_id = mi_8x8[0]->segment_id;
+      qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+      sum_delta += qdelta_index;
+      map_index++;
+    }
+    mi_8x8_ptr += cm->mi_stride;
+  }
+
+  return sum_delta / (cm->mi_rows * cm->mi_cols);
+}
+
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
 
@@ -4260,8 +4284,13 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     cm->reference_mode = SINGLE_REFERENCE;
     encode_frame_internal(cpi);
   }
-}
 
+  // If segmentated AQ is enabled compute the average AQ weighting.
+  if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
+      (cm->seg.update_map || cm->seg.update_data)) {
+    cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
+  }
+}
 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
   const PREDICTION_MODE y_mode = mi->mode;
   const PREDICTION_MODE uv_mode = mi->uv_mode;
@@ -4281,6 +4310,33 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
   ++counts->uv_mode[y_mode][uv_mode];
 }
 
+static void update_zeromv_cnt(VP9_COMP *const cpi,
+                              const MODE_INFO *const mi,
+                              int mi_row, int mi_col,
+                              BLOCK_SIZE bsize) {
+  const VP9_COMMON *const cm = &cpi->common;
+  MV mv = mi->mv[0].as_mv;
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+  const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+  const int block_index = mi_row * cm->mi_cols + mi_col;
+  int x, y;
+  for (y = 0; y < ymis; y++)
+    for (x = 0; x < xmis; x++) {
+      int map_offset = block_index + y * cm->mi_cols + x;
+      if (is_inter_block(mi) && mi->skip &&
+          mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+        if (abs(mv.row) < 8 && abs(mv.col) < 8) {
+          if (cpi->consec_zero_mv[map_offset] < 255)
+           cpi->consec_zero_mv[map_offset]++;
+        } else {
+          cpi->consec_zero_mv[map_offset] = 0;
+        }
+      }
+    }
+}
+
 static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
                               TOKENEXTRA **t, int output_enabled,
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -4361,5 +4417,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
     ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
     if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
       vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
+    if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
+      update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
   }
 }
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 4e5377889..68537e97f 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -375,6 +375,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
   vpx_free(cpi->active_map.map);
   cpi->active_map.map = NULL;
 
+  vpx_free(cpi->consec_zero_mv);
+  cpi->consec_zero_mv = NULL;
+
   vp9_free_ref_frame_buffers(cm->buffer_pool);
 #if CONFIG_VP9_POSTPROC
   vp9_free_postproc_buffers(cm);
@@ -1549,9 +1552,12 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
 
   update_frame_size(cpi);
 
-  if ((last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) &&
-      cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
-    vp9_cyclic_refresh_reset_resize(cpi);
+  if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
+    memset(cpi->consec_zero_mv, 0,
+               cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_reset_resize(cpi);
+  }
 
   if ((cpi->svc.number_temporal_layers > 1 &&
       cpi->oxcf.rc_mode == VPX_CBR) ||
@@ -1698,6 +1704,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
 
   realloc_segmentation_maps(cpi);
 
+  CHECK_MEM_ERROR(cm, cpi->consec_zero_mv,
+                  vpx_calloc(cm->mi_rows * cm->mi_cols,
+                             sizeof(*cpi->consec_zero_mv)));
+
   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
@@ -3127,7 +3137,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
        "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
         "%6d %6d %5d %5d %5d "
         "%10"PRId64" %10.3lf"
-        "%10lf %8u %10"PRId64" %10d %10d %10d %10d\n",
+        "%10lf %8u %10"PRId64" %10d %10d %10d %10d %10d\n",
         cpi->common.current_video_frame,
         cm->width, cm->height,
         cpi->td.rd_counts.m_search_count,
@@ -3161,7 +3171,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
         cpi->twopass.kf_zeromotion_pct,
         cpi->twopass.fr_content_type,
-        cm->lf.filter_level);
+        cm->lf.filter_level,
+        cm->seg.aq_av_offset);
   }
   fclose(f);
 
@@ -3430,6 +3441,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
                                              cpi->unscaled_last_source,
                                              &cpi->scaled_last_source,
                                              (cpi->oxcf.pass == 0));
+
+  if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) {
+    memset(cpi->consec_zero_mv, 0,
+           cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
+  }
+
   vp9_update_noise_estimate(cpi);
 
   if (cpi->oxcf.pass == 0 &&
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 2def941ef..8a5c4f40b 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -499,6 +499,9 @@ typedef struct VP9_COMP {
 
   NOISE_ESTIMATE noise_estimate;
 
+  // Count on how many consecutive times a block uses small/zeromv for encoding.
+  uint8_t *consec_zero_mv;
+
   // VAR_BASED_PARTITION thresholds
   // 0 - threshold_64x64; 1 - threshold_32x32;
   // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 3e34c012c..e25b64202 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1124,7 +1124,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
     fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
     fps.intra_smooth_pct = (double)intra_smooth_count / num_mbs;
     fps.inactive_zone_rows = (double)image_data_start_row;
-    fps.inactive_zone_cols = (double)0;  // TODO(paulwilkins): fix
+    // Currently set to 0 as most issues relate to letter boxing.
+    fps.inactive_zone_cols = (double)0;
 
     if (mvcount > 0) {
       fps.MVr = (double)sum_mvr / mvcount;
@@ -1150,10 +1151,9 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
       fps.pcnt_motion = 0.0;
     }
 
-    // TODO(paulwilkins):  Handle the case when duration is set to 0, or
-    // something less than the full time between subsequent values of
-    // cpi->source_time_stamp.
-    fps.duration = (double)(source->ts_end - source->ts_start);
+    // Dont allow a value of 0 for duration.
+    // (Section duration is also defaulted to minimum of 1.0).
+    fps.duration = VPXMAX(1.0, (double)(source->ts_end - source->ts_start));
 
     // Don't want to do output stats with a stack variable!
     twopass->this_frame_stats = fps;
@@ -2849,6 +2849,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 void vp9_twopass_postencode_update(VP9_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
   RATE_CONTROL *const rc = &cpi->rc;
+  VP9_COMMON *const cm = &cpi->common;
   const int bits_used = rc->base_frame_target;
 
   // VBR correction is done through rc->vbr_bits_off_target. Based on the
@@ -2886,6 +2887,22 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
       rc->worst_quality - twopass->active_worst_quality;
     const int minq_adj_limit =
         (cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT);
+    int aq_extend_min = 0;
+    int aq_extend_max = 0;
+
+    // Extend min or Max Q range to account for imbalance from the base
+    // value when using AQ.
+    if (cpi->oxcf.aq_mode != NO_AQ) {
+      if (cm->seg.aq_av_offset < 0) {
+        // The balance of the AQ map tends towarda lowering the average Q.
+        aq_extend_min = 0;
+        aq_extend_max = VPXMIN(maxq_adj_limit, -cm->seg.aq_av_offset);
+      } else {
+        // The balance of the AQ map tends towards raising the average Q.
+        aq_extend_min = VPXMIN(minq_adj_limit, cm->seg.aq_av_offset);
+        aq_extend_max = 0;
+      }
+    }
 
     // Undershoot.
     if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
@@ -2910,8 +2927,10 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
         --twopass->extend_maxq;
     }
 
-    twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
-    twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
+    twopass->extend_minq =
+        clamp(twopass->extend_minq, aq_extend_min, minq_adj_limit);
+    twopass->extend_maxq =
+        clamp(twopass->extend_maxq, aq_extend_max, maxq_adj_limit);
 
     // If there is a big and undexpected undershoot then feed the extra
     // bits back in quickly. One situation where this may happen is if a
diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
index 10ea01012..d4fee6f9b 100644
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -40,11 +40,9 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
 }
 
 int enable_noise_estimation(VP9_COMP *const cpi) {
-  // Enable noise estimation if denoising is on (and cyclic refresh, since
-  // noise estimate is currently using a struct defined in cyclic refresh).
+  // Enable noise estimation if denoising is on.
 #if CONFIG_VP9_TEMPORAL_DENOISING
-  if (cpi->oxcf.noise_sensitivity > 0 &&
-      cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+  if (cpi->oxcf.noise_sensitivity > 0)
     return 1;
 #endif
   // Only allow noise estimate under certain encoding mode.
@@ -101,7 +99,6 @@ NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) {
 
 void vp9_update_noise_estimate(VP9_COMP *const cpi) {
   const VP9_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   NOISE_ESTIMATE *const ne = &cpi->noise_estimate;
   // Estimate of noise level every frame_period frames.
   int frame_period = 10;
@@ -131,6 +128,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
       ne->last_h = cm->height;
     }
     return;
+  } else if (cpi->rc.avg_frame_low_motion < 50) {
+    // Force noise estimation to 0 and denoiser off if content has high motion.
+    ne->level = kLowLow;
+#if CONFIG_VP9_TEMPORAL_DENOISING
+    if (cpi->oxcf.noise_sensitivity > 0)
+      vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+#endif
+    return;
   } else {
     int num_samples = 0;
     uint64_t avg_est = 0;
@@ -153,7 +158,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
     for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
       for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
         int bl_index = mi_row * cm->mi_cols + mi_col;
-        if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv)
+        if (cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv)
           num_low_motion++;
       }
     }
@@ -173,23 +178,26 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
           // been encoded as zero/low motion x (= thresh_consec_zeromv) frames
           // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
           // 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
-          int consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index],
-                                     VPXMIN(cr->consec_zero_mv[bl_index1],
-                                     VPXMIN(cr->consec_zero_mv[bl_index2],
-                                     cr->consec_zero_mv[bl_index3])));
-          int is_skin = vp9_compute_skin_block(src_y,
-                                               src_u,
-                                               src_v,
-                                               src_ystride,
-                                               src_uvstride,
-                                               bsize,
-                                               consec_zeromv,
-                                               0);
+          int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+                                     VPXMIN(cpi->consec_zero_mv[bl_index1],
+                                     VPXMIN(cpi->consec_zero_mv[bl_index2],
+                                     cpi->consec_zero_mv[bl_index3])));
+          int is_skin = 0;
+          if (cpi->use_skin_detection) {
+            is_skin = vp9_compute_skin_block(src_y,
+                                             src_u,
+                                             src_v,
+                                             src_ystride,
+                                             src_uvstride,
+                                             bsize,
+                                             consec_zeromv,
+                                             0);
+          }
           if (frame_low_motion &&
-              cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
-              cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
-              cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
-              cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
+              cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
+              cpi->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
+              cpi->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
+              cpi->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
               !is_skin) {
             // Compute variance.
             unsigned int sse;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 3bd175392..d53e60aea 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -338,6 +338,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
   rc->total_target_bits = 0;
   rc->total_target_vs_actual = 0;
   rc->avg_intersize_gfint = 0;
+  rc->avg_frame_low_motion = 0;
 
   rc->frames_since_key = 8;  // Sensible default for first frame.
   rc->this_key_frame_forced = 0;
@@ -1334,6 +1335,26 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
   }
 }
 
+static void compute_frame_low_motion(VP9_COMP *const cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  int mi_row, mi_col;
+  MODE_INFO **mi = cm->mi_grid_visible;
+  RATE_CONTROL *const rc = &cpi->rc;
+  const int rows = cm->mi_rows, cols = cm->mi_cols;
+  int cnt_zeromv = 0;
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      if (abs(mi[0]->mv[0].as_mv.row) < 16 &&
+          abs(mi[0]->mv[0].as_mv.col) < 16)
+        cnt_zeromv++;
+      mi++;
+    }
+    mi += 8;
+  }
+  cnt_zeromv = 100 * cnt_zeromv / (rows * cols);
+  rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) >> 2;
+}
+
 void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   const VP9_COMMON *const cm = &cpi->common;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1420,10 +1441,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
 
   rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
 
-  if (!cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
-    rc->avg_intersize_gfint += rc->projected_frame_size;
-  }
-
   if (!cpi->use_svc || is_two_pass_svc(cpi)) {
     if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
         (cm->frame_type != KEY_FRAME))
@@ -1447,6 +1464,13 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
         rc->next_frame_size_selector != rc->frame_size_selector;
     rc->frame_size_selector = rc->next_frame_size_selector;
   }
+
+  if (oxcf->pass == 0) {
+    if (cm->frame_type != KEY_FRAME)
+      compute_frame_low_motion(cpi);
+    if (!cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame)
+      rc->avg_intersize_gfint += rc->projected_frame_size;
+  }
 }
 
 void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1507,6 +1531,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   if (rc->frames_till_gf_update_due == 0) {
     rc->avg_intersize_gfint =
         rc->avg_intersize_gfint / (rc->baseline_gf_interval + 1);
+    rc->gfu_boost = DEFAULT_GF_BOOST;
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) {
       vp9_cyclic_refresh_set_golden_update(cpi);
     } else {
@@ -1523,6 +1548,11 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
           rc->avg_frame_qindex[INTER_FRAME] > (7 * rc->worst_quality) >> 3 &&
           rc->avg_intersize_gfint > (5 * rc->avg_frame_bandwidth) >> 1) {
           rc->baseline_gf_interval = (3 * rc->baseline_gf_interval) >> 1;
+      } else if (cm->current_video_frame > 30 &&
+                 rc->avg_frame_low_motion < 20) {
+        // Decrease boost and gf interval for high motion case.
+        rc->gfu_boost = DEFAULT_GF_BOOST >> 1;
+        rc->baseline_gf_interval = VPXMIN(6, rc->baseline_gf_interval >> 1);
       }
     }
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -1535,7 +1565,6 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
     }
     cpi->refresh_golden_frame = 1;
     rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
-    rc->gfu_boost = DEFAULT_GF_BOOST;
     rc->avg_intersize_gfint = 0;
   }
   if (cm->frame_type == KEY_FRAME)
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 7b6f16548..eef19401e 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -163,6 +163,7 @@ typedef struct {
   int high_source_sad;
   int count_last_scene_change;
   int avg_intersize_gfint;
+  int avg_frame_low_motion;
 } RATE_CONTROL;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c
index ff0dfce67..268aa919b 100644
--- a/vp9/encoder/vp9_skin_detection.c
+++ b/vp9/encoder/vp9_skin_detection.c
@@ -112,7 +112,6 @@ int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
 void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
   int i, j, mi_row, mi_col, num_bl;
   VP9_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   uint8_t *y;
   const uint8_t *src_y = cpi->Source->y_buffer;
   const uint8_t *src_u = cpi->Source->u_buffer;
@@ -166,19 +165,17 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
       } else {
         int block_size = BLOCK_8X8;
         int consec_zeromv = 0;
-        if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
-          int bl_index = mi_row * cm->mi_cols + mi_col;
-          int bl_index1 = bl_index + 1;
-          int bl_index2 = bl_index + cm->mi_cols;
-          int bl_index3 = bl_index2 + 1;
-          if (y_bsize == 8)
-            consec_zeromv = cr->consec_zero_mv[bl_index];
-          else
-            consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index],
-                                     VPXMIN(cr->consec_zero_mv[bl_index1],
-                                     VPXMIN(cr->consec_zero_mv[bl_index2],
-                                     cr->consec_zero_mv[bl_index3])));
-        }
+        int bl_index = mi_row * cm->mi_cols + mi_col;
+        int bl_index1 = bl_index + 1;
+        int bl_index2 = bl_index + cm->mi_cols;
+        int bl_index3 = bl_index2 + 1;
+        if (y_bsize == 8)
+          consec_zeromv = cpi->consec_zero_mv[bl_index];
+        else
+          consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+                                 VPXMIN(cpi->consec_zero_mv[bl_index1],
+                                 VPXMIN(cpi->consec_zero_mv[bl_index2],
+                                 cpi->consec_zero_mv[bl_index3])));
         if (y_bsize == 16)
           block_size = BLOCK_16X16;
         is_skin  = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride,
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 73048f842..1814a32c9 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -327,12 +327,12 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
     CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
     signed char *temp = cr->map;
     uint8_t *temp2 = cr->last_coded_q_map;
-    uint8_t *temp3 = cr->consec_zero_mv;
+    uint8_t *temp3 = cpi->consec_zero_mv;
     cr->map = lc->map;
     lc->map = temp;
     cr->last_coded_q_map = lc->last_coded_q_map;
     lc->last_coded_q_map = temp2;
-    cr->consec_zero_mv = lc->consec_zero_mv;
+    cpi->consec_zero_mv = lc->consec_zero_mv;
     lc->consec_zero_mv = temp3;
     cr->sb_index = lc->sb_index;
   }
@@ -360,8 +360,8 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
     cr->map = temp;
     lc->last_coded_q_map = cr->last_coded_q_map;
     cr->last_coded_q_map = temp2;
-    lc->consec_zero_mv = cr->consec_zero_mv;
-    cr->consec_zero_mv = temp3;
+    lc->consec_zero_mv = cpi->consec_zero_mv;
+    cpi->consec_zero_mv = temp3;
     lc->sb_index = cr->sb_index;
   }
 }
diff --git a/vpx_dsp/arm/avg_neon.c b/vpx_dsp/arm/avg_neon.c
index d054c4185..e52958c54 100644
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -197,3 +197,60 @@ int vpx_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) {
     return s - ((t * t) >> shift_factor);
   }
 }
+
+void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride,
+                         const uint8_t *b, int b_stride,
+                         int *min, int *max) {
+  // Load and concatenate.
+  const uint8x16_t a01 = vcombine_u8(vld1_u8(a),
+                                     vld1_u8(a + a_stride));
+  const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride),
+                                     vld1_u8(a + 3 * a_stride));
+  const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride),
+                                     vld1_u8(a + 5 * a_stride));
+  const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride),
+                                     vld1_u8(a + 7 * a_stride));
+
+  const uint8x16_t b01 = vcombine_u8(vld1_u8(b),
+                                     vld1_u8(b + b_stride));
+  const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride),
+                                     vld1_u8(b + 3 * b_stride));
+  const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride),
+                                     vld1_u8(b + 5 * b_stride));
+  const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride),
+                                     vld1_u8(b + 7 * b_stride));
+
+  // Absolute difference.
+  const uint8x16_t ab01_diff = vabdq_u8(a01, b01);
+  const uint8x16_t ab23_diff = vabdq_u8(a23, b23);
+  const uint8x16_t ab45_diff = vabdq_u8(a45, b45);
+  const uint8x16_t ab67_diff = vabdq_u8(a67, b67);
+
+  // Max values between the Q vectors.
+  const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff);
+  const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff);
+  const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff);
+  const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff);
+
+  const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
+  const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
+
+  // Split to D and start doing pairwise.
+  uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max));
+  uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min));
+
+  // Enough runs of vpmax/min propogate the max/min values to every position.
+  ab_max = vpmax_u8(ab_max, ab_max);
+  ab_min = vpmin_u8(ab_min, ab_min);
+
+  ab_max = vpmax_u8(ab_max, ab_max);
+  ab_min = vpmin_u8(ab_min, ab_min);
+
+  ab_max = vpmax_u8(ab_max, ab_max);
+  ab_min = vpmin_u8(ab_min, ab_min);
+
+  *min = *max = 0;  // Clear high bits
+  // Store directly to avoid costly neon->gpr transfer.
+  vst1_lane_u8((uint8_t *)max, ab_max, 0);
+  vst1_lane_u8((uint8_t *)min, ab_min, 0);
+}
diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c
index a44c52e8d..a8c996663 100644
--- a/vpx_dsp/avg.c
+++ b/vpx_dsp/avg.c
@@ -62,6 +62,8 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride,
   coeff[5] = c3 - c7;
 }
 
+// The order of the output coeff of the hadamard is not important. For
+// optimization purposes the final transpose may be skipped.
 void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
                         int16_t *coeff) {
   int idx;
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 2b131929e..9ea80a098 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1014,7 +1014,7 @@ if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCO
   specialize qw/vpx_avg_4x4 sse2 neon msa/;
 
   add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-  specialize qw/vpx_minmax_8x8 sse2/;
+  specialize qw/vpx_minmax_8x8 sse2 neon/;
 
   add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
   specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";