diff options
-rw-r--r-- | test/acm_random.h | 6 | ||||
-rw-r--r-- | test/hadamard_test.cc | 126 | ||||
-rw-r--r-- | test/minmax_test.cc | 132 | ||||
-rw-r--r-- | test/test.mk | 2 | ||||
-rw-r--r-- | vp9/common/vp9_loopfilter.c | 2 | ||||
-rw-r--r-- | vp9/common/vp9_seg_common.c | 1 | ||||
-rw-r--r-- | vp9/common/vp9_seg_common.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.c | 23 | ||||
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_denoiser.c | 32 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 70 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 27 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 33 | ||||
-rw-r--r-- | vp9/encoder/vp9_noise_estimate.c | 52 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 39 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_skin_detection.c | 25 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 8 | ||||
-rw-r--r-- | vpx_dsp/arm/avg_neon.c | 57 | ||||
-rw-r--r-- | vpx_dsp/avg.c | 2 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 |
22 files changed, 540 insertions, 106 deletions
diff --git a/test/acm_random.h b/test/acm_random.h index ff5c93ea1..a29ced2f7 100644 --- a/test/acm_random.h +++ b/test/acm_random.h @@ -32,6 +32,12 @@ class ACMRandom { return (value >> 15) & 0xffff; } + int16_t Rand9Signed(void) { + // Use 9 bits: values between 255 (0x0FF) and -256 (0x100). + const uint32_t value = random_.Generate(512); + return static_cast<int16_t>(value - 256); + } + uint8_t Rand8(void) { const uint32_t value = random_.Generate(testing::internal::Random::kMaxRange); diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc new file mode 100644 index 000000000..0bf6f4af7 --- /dev/null +++ b/test/hadamard_test.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <algorithm> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vpx_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" + +namespace { + +using ::libvpx_test::ACMRandom; + +typedef void (*Hadamard8x8Func)(const int16_t *a, int a_stride, + int16_t *b); + +class HadamardTest : public ::testing::TestWithParam<Hadamard8x8Func> { + public: + virtual void SetUp() { + h_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + Hadamard8x8Func h_func_; + ACMRandom rnd_; +}; + +void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) { + int16_t b[8]; + for (int i = 0; i < 8; i += 2) { + b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride]; + b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride]; + } + int16_t c[8]; + for (int i = 0; i < 8; i += 4) { + c[i + 0] = b[i + 0] + b[i + 2]; + c[i + 1] = b[i + 1] + b[i + 3]; + c[i + 2] = b[i + 0] - b[i + 2]; + c[i + 3] = b[i + 1] - b[i + 3]; + } + out[0] = c[0] + c[4]; + out[7] = c[1] + c[5]; + out[3] = c[2] + c[6]; + out[4] = c[3] + c[7]; + out[2] = c[0] - c[4]; + out[6] = c[1] - c[5]; + out[1] = c[2] - c[6]; + out[5] = c[3] - c[7]; +} + +void reference_hadamard(const int16_t *a, int a_stride, int16_t *b) { + int16_t buf[64]; + for (int i = 0; i < 8; i++) { + hadamard_loop(a + i, a_stride, buf + i * 8); + } + + for (int i = 0; i < 8; i++) { + hadamard_loop(buf + i, 8, b + i * 8); + } +} + +TEST_P(HadamardTest, CompareReferenceRandom) { + DECLARE_ALIGNED(16, int16_t, a[64]); + DECLARE_ALIGNED(16, int16_t, b[64]); + int16_t b_ref[64]; + for (int i = 0; i < 64; i++) { + a[i] = rnd_.Rand9Signed(); + } + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard(a, 8, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 64); + std::sort(b_ref, b_ref + 64); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); +} + +TEST_P(HadamardTest, VaryStride) { + DECLARE_ALIGNED(16, int16_t, a[64 * 8]); + DECLARE_ALIGNED(16, int16_t, b[64]); + int16_t b_ref[64]; + for (int i = 0; i < 64 * 8; i++) { + a[i] = rnd_.Rand9Signed(); + } + + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard(a, i, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 64); + std::sort(b_ref, b_ref + 64); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } +} + +INSTANTIATE_TEST_CASE_P(C, HadamardTest, + ::testing::Values(&vpx_hadamard_8x8_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, HadamardTest, + ::testing::Values(&vpx_hadamard_8x8_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 +INSTANTIATE_TEST_CASE_P(SSSE3, HadamardTest, + ::testing::Values(&vpx_hadamard_8x8_ssse3)); +#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 +} // namespace diff --git a/test/minmax_test.cc b/test/minmax_test.cc new file mode 100644 index 000000000..dbe4342dc --- /dev/null +++ b/test/minmax_test.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" + +namespace { + +using ::libvpx_test::ACMRandom; + +typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int *min, int *max); + +class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> { + public: + virtual void SetUp() { + mm_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + MinMaxFunc mm_func_; + ACMRandom rnd_; +}; + +void reference_minmax(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int *min_ret, int *max_ret) { + int min = 255; + int max = 0; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]); + if (min > diff) min = diff; + if (max < diff) max = diff; + } + } + + *min_ret = min; + *max_ret = max; +} + +TEST_P(MinMaxTest, MinValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 255, sizeof(b)); + b[i] = i; // Set a minimum difference of i. + + int min, max; + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(255, max); + EXPECT_EQ(i, min); + } +} + +TEST_P(MinMaxTest, MaxValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 0, sizeof(b)); + b[i] = i; // Set a maximum difference of i. + + int min, max; + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(i, max); + EXPECT_EQ(0, min); + } +} + +TEST_P(MinMaxTest, CompareReference) { + uint8_t a[64], b[64]; + for (int j = 0; j < 64; j++) { + a[j] = rnd_.Rand8(); + b[j] = rnd_.Rand8(); + } + + int min_ref, max_ref, min, max; + reference_minmax(a, 8, b, 8, &min_ref, &max_ref); + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(max_ref, max); + EXPECT_EQ(min_ref, min); +} + +TEST_P(MinMaxTest, CompareReferenceAndVaryStride) { + uint8_t a[8 * 64], b[8 * 64]; + for (int i = 0; i < 8 * 64; i++) { + a[i] = rnd_.Rand8(); + b[i] = rnd_.Rand8(); + } + for (int a_stride = 8; a_stride <= 64; a_stride += 8) { + for (int b_stride = 8; b_stride <= 64; b_stride += 8) { + int min_ref, max_ref, min, max; + reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref); + ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max)); + EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride + << " and b_stride = " << b_stride;; + EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride + << " and b_stride = " << b_stride;; + } + } +} + +INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest, + ::testing::Values(&vpx_minmax_8x8_sse2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest, + ::testing::Values(&vpx_minmax_8x8_neon)); +#endif + +} // namespace diff --git a/test/test.mk b/test/test.mk index 693185362..7c22ca501 100644 --- a/test/test.mk +++ b/test/test.mk @@ -143,6 +143,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 2d98e77aa..461462552 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -1072,8 +1072,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, } break; } - - vp9_adjust_mask(cm, mi_row, mi_col, lfm); } static void filter_selectively_vert(uint8_t *s, int pitch, diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c index c8ef618b7..7af61629a 100644 --- a/vp9/common/vp9_seg_common.c +++ b/vp9/common/vp9_seg_common.c @@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = { void vp9_clearall_segfeatures(struct segmentation *seg) { vp9_zero(seg->feature_data); vp9_zero(seg->feature_mask); + seg->aq_av_offset = 0; } void vp9_enable_segfeature(struct segmentation *seg, int segment_id, diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h index 5b75d8d4e..7ea7c3dd7 100644 --- a/vp9/common/vp9_seg_common.h +++ b/vp9/common/vp9_seg_common.h @@ -47,6 +47,7 @@ struct segmentation { int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; unsigned int feature_mask[MAX_SEGMENTS]; + int aq_av_offset; }; static INLINE int segfeature_active(const struct segmentation *seg, diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 1c8d2b45d..6b651039c 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -23,7 +23,6 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; - size_t consec_zero_mv_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; @@ -41,21 +40,12 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { } assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); - - consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv); - cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size); - if (cr->consec_zero_mv == NULL) { - vp9_cyclic_refresh_free(cr); - return NULL; - } - memset(cr->consec_zero_mv, 0, consec_zero_mv_size); return cr; } void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { vpx_free(cr->map); vpx_free(cr->last_coded_q_map); - vpx_free(cr->consec_zero_mv); vpx_free(cr); } @@ -245,7 +235,6 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - MV mv = mi->mv[0].as_mv; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); @@ -269,15 +258,8 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ), cr->last_coded_q_map[map_offset]); - // Update the consecutive zero/low_mv count. - if (is_inter_block(mi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) { - if (cr->consec_zero_mv[map_offset] < 255) - cr->consec_zero_mv[map_offset]++; - } else { - cr->consec_zero_mv[map_offset] = 0; } } - } } // Update the actual number of blocks that were applied the segment delta q. @@ -442,7 +424,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { if (cr->map[bl_index2] == 0) { count_tot++; if (cr->last_coded_q_map[bl_index2] > qindex_thresh || - cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) { + cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) { sum_map++; count_sel++; } @@ -545,8 +527,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { if (cm->frame_type == KEY_FRAME) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); - memset(cr->consec_zero_mv, 0, - cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv)); cr->sb_index = 0; } return; @@ -621,7 +601,6 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols); - memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols); cr->sb_index = 0; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h index 095b9283f..35eea182f 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -53,8 +53,6 @@ struct CYCLIC_REFRESH { signed char *map; // Map of the last q a block was coded at. uint8_t *last_coded_q_map; - // Count on how many consecutive times a block uses ZER0MV for encoding. - uint8_t *consec_zero_mv; // Thresholds applied to the projected rate/distortion of the coding block, // when deciding whether block should be refreshed. int64_t thresh_rate_sb; diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index dbca633c7..42d456e89 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -21,12 +21,6 @@ #include "vp9/encoder/vp9_denoiser.h" #include "vp9/encoder/vp9_encoder.h" -/* The VP9 denoiser is similar to that of the VP8 denoiser. While - * choosing the motion vectors / reference frames, the denoiser is run, and if - * it did not modify the signal to much, the denoised block is copied to the - * signal. - */ - #ifdef OUTPUT_YUV_DENOISED static void make_grayscale(YV12_BUFFER_CONFIG *yuv); #endif @@ -49,16 +43,19 @@ static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) { } static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) { - return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40); + return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40); } static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising, int motion_magnitude) { if (motion_magnitude > noise_motion_thresh(bs, increase_denoising)) { - return 0; + if (increase_denoising) + return (1 << num_pels_log2_lookup[bs]) << 2; + else + return 0; } else { - return (1 << num_pels_log2_lookup[bs]) * 20; + return (1 << num_pels_log2_lookup[bs]) << 4; } } @@ -215,12 +212,14 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, // Avoid denoising for small block (unless motion is small). // Small blocks are selected in variance partition (before encoding) and // will typically lie on moving areas. - if (motion_magnitude > 16 && bs <= BLOCK_8X8) + if (denoiser->denoising_level < kDenHigh && + motion_magnitude > 16 && bs <= BLOCK_8X8) return COPY_BLOCK; // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. if (frame != INTRA_FRAME && + ctx->newmv_sse != UINT_MAX && sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { mi->ref_frame[0] = ctx->best_reference_frame; mi->mode = ctx->best_sse_inter_mode; @@ -242,6 +241,9 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, ctx->best_sse_inter_mode = ZEROMV; ctx->best_sse_mv.as_int = 0; *zeromv_filter = 1; + if (denoiser->denoising_level > kDenMedium) { + motion_magnitude = 0; + } } if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { @@ -334,14 +336,13 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, if (cpi->use_skin_detection && bs <= BLOCK_32X32 && - denoiser->denoising_level >= kDenLow) { + denoiser->denoising_level < kDenHigh) { int motion_level = (motion_magnitude < 16) ? 0 : 1; // If motion for current block is small/zero, compute consec_zeromv for // skin detection (early exit in skin detection is done for large // consec_zeromv when current block has small/zero motion). consec_zeromv = 0; if (motion_level == 0) { - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; VP9_COMMON * const cm = &cpi->common; int j, i; // Loop through the 8x8 sub-blocks. @@ -354,7 +355,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, for (i = 0; i < ymis; i++) { for (j = 0; j < xmis; j++) { int bl_index = block_index + i * cm->mi_cols + j; - consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], consec_zeromv); + consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], consec_zeromv); // No need to keep checking 8x8 blocks if any of the sub-blocks // has small consec_zeromv (since threshold for no_skin based on // zero/small motion in skin detection is high, i.e, > 4). @@ -376,8 +377,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, motion_level); } if (!is_skin && - denoiser->denoising_level == kDenHigh && - motion_magnitude < 16) { + denoiser->denoising_level == kDenHigh) { denoiser->increase_denoising = 1; } else { denoiser->increase_denoising = 0; @@ -494,12 +494,12 @@ void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) { ctx->zeromv_sse = UINT_MAX; ctx->newmv_sse = UINT_MAX; ctx->zeromv_lastref_sse = UINT_MAX; + ctx->best_sse_mv.as_int = 0; } void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PREDICTION_MODE mode, PICK_MODE_CONTEXT *ctx) { - // TODO(tkopp): Use both MVs if possible if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) { ctx->zeromv_sse = sse; ctx->best_zeromv_reference_frame = mi->ref_frame[0]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 634e5cfe8..73adf2a8d 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -780,7 +780,6 @@ static int choose_partitioning(VP9_COMP *cpi, #if !CONFIG_VP9_HIGHBITDEPTH if (cpi->use_skin_detection && !low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows)) { - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; int bl_index1, bl_index2, bl_index3; int num_16x16_skin = 0; int num_16x16_nonskin = 0; @@ -803,10 +802,10 @@ static int choose_partitioning(VP9_COMP *cpi, bl_index1 = bl_index + 1; bl_index2 = bl_index + cm->mi_cols; bl_index3 = bl_index2 + 1; - consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], - VPXMIN(cr->consec_zero_mv[bl_index1], - VPXMIN(cr->consec_zero_mv[bl_index2], - cr->consec_zero_mv[bl_index3]))); + consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], + VPXMIN(cpi->consec_zero_mv[bl_index1], + VPXMIN(cpi->consec_zero_mv[bl_index2], + cpi->consec_zero_mv[bl_index3]))); is_skin = vp9_compute_skin_block(ysignal, usignal, vsignal, @@ -4138,6 +4137,31 @@ static INTERP_FILTER get_interp_filter( } } +static int compute_frame_aq_offset(struct VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; + struct segmentation *const seg = &cm->seg; + + int mi_row, mi_col; + int sum_delta = 0; + int map_index = 0; + int qdelta_index; + int segment_id; + + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { + MODE_INFO **mi_8x8 = mi_8x8_ptr; + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { + segment_id = mi_8x8[0]->segment_id; + qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); + sum_delta += qdelta_index; + map_index++; + } + mi_8x8_ptr += cm->mi_stride; + } + + return sum_delta / (cm->mi_rows * cm->mi_cols); +} + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -4260,8 +4284,13 @@ void vp9_encode_frame(VP9_COMP *cpi) { cm->reference_mode = SINGLE_REFERENCE; encode_frame_internal(cpi); } -} + // If segmentated AQ is enabled compute the average AQ weighting. + if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && + (cm->seg.update_map || cm->seg.update_data)) { + cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); + } +} static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { const PREDICTION_MODE y_mode = mi->mode; const PREDICTION_MODE uv_mode = mi->uv_mode; @@ -4281,6 +4310,33 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { ++counts->uv_mode[y_mode][uv_mode]; } +static void update_zeromv_cnt(VP9_COMP *const cpi, + const MODE_INFO *const mi, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const VP9_COMMON *const cm = &cpi->common; + MV mv = mi->mv[0].as_mv; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + int x, y; + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) { + int map_offset = block_index + y * cm->mi_cols + x; + if (is_inter_block(mi) && mi->skip && + mi->segment_id <= CR_SEGMENT_ID_BOOST2) { + if (abs(mv.row) < 8 && abs(mv.col) < 8) { + if (cpi->consec_zero_mv[map_offset] < 255) + cpi->consec_zero_mv[map_offset]++; + } else { + cpi->consec_zero_mv[map_offset] = 0; + } + } + } +} + static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -4361,5 +4417,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); + if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0) + update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); } } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 4e5377889..68537e97f 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -375,6 +375,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->active_map.map); cpi->active_map.map = NULL; + vpx_free(cpi->consec_zero_mv); + cpi->consec_zero_mv = NULL; + vp9_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC vp9_free_postproc_buffers(cm); @@ -1549,9 +1552,12 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { update_frame_size(cpi); - if ((last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) && - cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) - vp9_cyclic_refresh_reset_resize(cpi); + if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { + memset(cpi->consec_zero_mv, 0, + cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_reset_resize(cpi); + } if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || @@ -1698,6 +1704,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, realloc_segmentation_maps(cpi); + CHECK_MEM_ERROR(cm, cpi->consec_zero_mv, + vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cpi->consec_zero_mv))); + CHECK_MEM_ERROR(cm, cpi->nmvcosts[0], vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0]))); CHECK_MEM_ERROR(cm, cpi->nmvcosts[1], @@ -3127,7 +3137,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" "%6d %6d %5d %5d %5d " "%10"PRId64" %10.3lf" - "%10lf %8u %10"PRId64" %10d %10d %10d %10d\n", + "%10lf %8u %10"PRId64" %10d %10d %10d %10d %10d\n", cpi->common.current_video_frame, cm->width, cm->height, cpi->td.rd_counts.m_search_count, @@ -3161,7 +3171,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost, cpi->twopass.kf_zeromotion_pct, cpi->twopass.fr_content_type, - cm->lf.filter_level); + cm->lf.filter_level, + cm->seg.aq_av_offset); } fclose(f); @@ -3430,6 +3441,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, cpi->unscaled_last_source, &cpi->scaled_last_source, (cpi->oxcf.pass == 0)); + + if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) { + memset(cpi->consec_zero_mv, 0, + cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); + } + vp9_update_noise_estimate(cpi); if (cpi->oxcf.pass == 0 && diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 2def941ef..8a5c4f40b 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -499,6 +499,9 @@ typedef struct VP9_COMP { NOISE_ESTIMATE noise_estimate; + // Count on how many consecutive times a block uses small/zeromv for encoding. + uint8_t *consec_zero_mv; + // VAR_BASED_PARTITION thresholds // 0 - threshold_64x64; 1 - threshold_32x32; // 2 - threshold_16x16; 3 - vbp_threshold_8x8; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 3e34c012c..e25b64202 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -1124,7 +1124,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { fps.intra_skip_pct = (double)intra_skip_count / num_mbs; fps.intra_smooth_pct = (double)intra_smooth_count / num_mbs; fps.inactive_zone_rows = (double)image_data_start_row; - fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix + // Currently set to 0 as most issues relate to letter boxing. + fps.inactive_zone_cols = (double)0; if (mvcount > 0) { fps.MVr = (double)sum_mvr / mvcount; @@ -1150,10 +1151,9 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { fps.pcnt_motion = 0.0; } - // TODO(paulwilkins): Handle the case when duration is set to 0, or - // something less than the full time between subsequent values of - // cpi->source_time_stamp. - fps.duration = (double)(source->ts_end - source->ts_start); + // Dont allow a value of 0 for duration. + // (Section duration is also defaulted to minimum of 1.0). + fps.duration = VPXMAX(1.0, (double)(source->ts_end - source->ts_start)); // Don't want to do output stats with a stack variable! twopass->this_frame_stats = fps; @@ -2849,6 +2849,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { void vp9_twopass_postencode_update(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; RATE_CONTROL *const rc = &cpi->rc; + VP9_COMMON *const cm = &cpi->common; const int bits_used = rc->base_frame_target; // VBR correction is done through rc->vbr_bits_off_target. Based on the @@ -2886,6 +2887,22 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { rc->worst_quality - twopass->active_worst_quality; const int minq_adj_limit = (cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT); + int aq_extend_min = 0; + int aq_extend_max = 0; + + // Extend min or Max Q range to account for imbalance from the base + // value when using AQ. + if (cpi->oxcf.aq_mode != NO_AQ) { + if (cm->seg.aq_av_offset < 0) { + // The balance of the AQ map tends towarda lowering the average Q. + aq_extend_min = 0; + aq_extend_max = VPXMIN(maxq_adj_limit, -cm->seg.aq_av_offset); + } else { + // The balance of the AQ map tends towards raising the average Q. + aq_extend_min = VPXMIN(minq_adj_limit, cm->seg.aq_av_offset); + aq_extend_max = 0; + } + } // Undershoot. if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) { @@ -2910,8 +2927,10 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { --twopass->extend_maxq; } - twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit); - twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit); + twopass->extend_minq = + clamp(twopass->extend_minq, aq_extend_min, minq_adj_limit); + twopass->extend_maxq = + clamp(twopass->extend_maxq, aq_extend_max, maxq_adj_limit); // If there is a big and undexpected undershoot then feed the extra // bits back in quickly. One situation where this may happen is if a diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c index 10ea01012..d4fee6f9b 100644 --- a/vp9/encoder/vp9_noise_estimate.c +++ b/vp9/encoder/vp9_noise_estimate.c @@ -40,11 +40,9 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, } int enable_noise_estimation(VP9_COMP *const cpi) { - // Enable noise estimation if denoising is on (and cyclic refresh, since - // noise estimate is currently using a struct defined in cyclic refresh). + // Enable noise estimation if denoising is on. #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && - cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + if (cpi->oxcf.noise_sensitivity > 0) return 1; #endif // Only allow noise estimate under certain encoding mode. @@ -101,7 +99,6 @@ NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { void vp9_update_noise_estimate(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; NOISE_ESTIMATE *const ne = &cpi->noise_estimate; // Estimate of noise level every frame_period frames. int frame_period = 10; @@ -131,6 +128,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { ne->last_h = cm->height; } return; + } else if (cpi->rc.avg_frame_low_motion < 50) { + // Force noise estimation to 0 and denoiser off if content has high motion. + ne->level = kLowLow; +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); +#endif + return; } else { int num_samples = 0; uint64_t avg_est = 0; @@ -153,7 +158,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { int bl_index = mi_row * cm->mi_cols + mi_col; - if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv) + if (cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv) num_low_motion++; } } @@ -173,23 +178,26 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // been encoded as zero/low motion x (= thresh_consec_zeromv) frames // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. - int consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], - VPXMIN(cr->consec_zero_mv[bl_index1], - VPXMIN(cr->consec_zero_mv[bl_index2], - cr->consec_zero_mv[bl_index3]))); - int is_skin = vp9_compute_skin_block(src_y, - src_u, - src_v, - src_ystride, - src_uvstride, - bsize, - consec_zeromv, - 0); + int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], + VPXMIN(cpi->consec_zero_mv[bl_index1], + VPXMIN(cpi->consec_zero_mv[bl_index2], + cpi->consec_zero_mv[bl_index3]))); + int is_skin = 0; + if (cpi->use_skin_detection) { + is_skin = vp9_compute_skin_block(src_y, + src_u, + src_v, + src_ystride, + src_uvstride, + bsize, + consec_zeromv, + 0); + } if (frame_low_motion && - cr->consec_zero_mv[bl_index] > thresh_consec_zeromv && - cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv && - cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv && - cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index1] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index2] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index3] > thresh_consec_zeromv && !is_skin) { // Compute variance. unsigned int sse; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 3bd175392..d53e60aea 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -338,6 +338,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->total_target_bits = 0; rc->total_target_vs_actual = 0; rc->avg_intersize_gfint = 0; + rc->avg_frame_low_motion = 0; rc->frames_since_key = 8; // Sensible default for first frame. rc->this_key_frame_forced = 0; @@ -1334,6 +1335,26 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { } } +static void compute_frame_low_motion(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + int mi_row, mi_col; + MODE_INFO **mi = cm->mi_grid_visible; + RATE_CONTROL *const rc = &cpi->rc; + const int rows = cm->mi_rows, cols = cm->mi_cols; + int cnt_zeromv = 0; + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + if (abs(mi[0]->mv[0].as_mv.row) < 16 && + abs(mi[0]->mv[0].as_mv.col) < 16) + cnt_zeromv++; + mi++; + } + mi += 8; + } + cnt_zeromv = 100 * cnt_zeromv / (rows * cols); + rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) >> 2; +} + void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1420,10 +1441,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (!cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { - rc->avg_intersize_gfint += rc->projected_frame_size; - } - if (!cpi->use_svc || is_two_pass_svc(cpi)) { if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) @@ -1447,6 +1464,13 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->next_frame_size_selector != rc->frame_size_selector; rc->frame_size_selector = rc->next_frame_size_selector; } + + if (oxcf->pass == 0) { + if (cm->frame_type != KEY_FRAME) + compute_frame_low_motion(cpi); + if (!cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) + rc->avg_intersize_gfint += rc->projected_frame_size; + } } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { @@ -1507,6 +1531,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { if (rc->frames_till_gf_update_due == 0) { rc->avg_intersize_gfint = rc->avg_intersize_gfint / (rc->baseline_gf_interval + 1); + rc->gfu_boost = DEFAULT_GF_BOOST; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) { vp9_cyclic_refresh_set_golden_update(cpi); } else { @@ -1523,6 +1548,11 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { rc->avg_frame_qindex[INTER_FRAME] > (7 * rc->worst_quality) >> 3 && rc->avg_intersize_gfint > (5 * rc->avg_frame_bandwidth) >> 1) { rc->baseline_gf_interval = (3 * rc->baseline_gf_interval) >> 1; + } else if (cm->current_video_frame > 30 && + rc->avg_frame_low_motion < 20) { + // Decrease boost and gf interval for high motion case. + rc->gfu_boost = DEFAULT_GF_BOOST >> 1; + rc->baseline_gf_interval = VPXMIN(6, rc->baseline_gf_interval >> 1); } } rc->frames_till_gf_update_due = rc->baseline_gf_interval; @@ -1535,7 +1565,6 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { } cpi->refresh_golden_frame = 1; rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; - rc->gfu_boost = DEFAULT_GF_BOOST; rc->avg_intersize_gfint = 0; } if (cm->frame_type == KEY_FRAME) diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 7b6f16548..eef19401e 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -163,6 +163,7 @@ typedef struct { int high_source_sad; int count_last_scene_change; int avg_intersize_gfint; + int avg_frame_low_motion; } RATE_CONTROL; struct VP9_COMP; diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c index ff0dfce67..268aa919b 100644 --- a/vp9/encoder/vp9_skin_detection.c +++ b/vp9/encoder/vp9_skin_detection.c @@ -112,7 +112,6 @@ int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { int i, j, mi_row, mi_col, num_bl; VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; uint8_t *y; const uint8_t *src_y = cpi->Source->y_buffer; const uint8_t *src_u = cpi->Source->u_buffer; @@ -166,19 +165,17 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { } else { int block_size = BLOCK_8X8; int consec_zeromv = 0; - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - int bl_index = mi_row * cm->mi_cols + mi_col; - int bl_index1 = bl_index + 1; - int bl_index2 = bl_index + cm->mi_cols; - int bl_index3 = bl_index2 + 1; - if (y_bsize == 8) - consec_zeromv = cr->consec_zero_mv[bl_index]; - else - consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], - VPXMIN(cr->consec_zero_mv[bl_index1], - VPXMIN(cr->consec_zero_mv[bl_index2], - cr->consec_zero_mv[bl_index3]))); - } + int bl_index = mi_row * cm->mi_cols + mi_col; + int bl_index1 = bl_index + 1; + int bl_index2 = bl_index + cm->mi_cols; + int bl_index3 = bl_index2 + 1; + if (y_bsize == 8) + consec_zeromv = cpi->consec_zero_mv[bl_index]; + else + consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], + VPXMIN(cpi->consec_zero_mv[bl_index1], + VPXMIN(cpi->consec_zero_mv[bl_index2], + cpi->consec_zero_mv[bl_index3]))); if (y_bsize == 16) block_size = BLOCK_16X16; is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 73048f842..1814a32c9 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -327,12 +327,12 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; signed char *temp = cr->map; uint8_t *temp2 = cr->last_coded_q_map; - uint8_t *temp3 = cr->consec_zero_mv; + uint8_t *temp3 = cpi->consec_zero_mv; cr->map = lc->map; lc->map = temp; cr->last_coded_q_map = lc->last_coded_q_map; lc->last_coded_q_map = temp2; - cr->consec_zero_mv = lc->consec_zero_mv; + cpi->consec_zero_mv = lc->consec_zero_mv; lc->consec_zero_mv = temp3; cr->sb_index = lc->sb_index; } @@ -360,8 +360,8 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { cr->map = temp; lc->last_coded_q_map = cr->last_coded_q_map; cr->last_coded_q_map = temp2; - lc->consec_zero_mv = cr->consec_zero_mv; - cr->consec_zero_mv = temp3; + lc->consec_zero_mv = cpi->consec_zero_mv; + cpi->consec_zero_mv = temp3; lc->sb_index = cr->sb_index; } } diff --git a/vpx_dsp/arm/avg_neon.c b/vpx_dsp/arm/avg_neon.c index d054c4185..e52958c54 100644 --- a/vpx_dsp/arm/avg_neon.c +++ b/vpx_dsp/arm/avg_neon.c @@ -197,3 +197,60 @@ int vpx_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) { return s - ((t * t) >> shift_factor); } } + +void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int *min, int *max) { + // Load and concatenate. + const uint8x16_t a01 = vcombine_u8(vld1_u8(a), + vld1_u8(a + a_stride)); + const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride), + vld1_u8(a + 3 * a_stride)); + const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride), + vld1_u8(a + 5 * a_stride)); + const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride), + vld1_u8(a + 7 * a_stride)); + + const uint8x16_t b01 = vcombine_u8(vld1_u8(b), + vld1_u8(b + b_stride)); + const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride), + vld1_u8(b + 3 * b_stride)); + const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride), + vld1_u8(b + 5 * b_stride)); + const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride), + vld1_u8(b + 7 * b_stride)); + + // Absolute difference. + const uint8x16_t ab01_diff = vabdq_u8(a01, b01); + const uint8x16_t ab23_diff = vabdq_u8(a23, b23); + const uint8x16_t ab45_diff = vabdq_u8(a45, b45); + const uint8x16_t ab67_diff = vabdq_u8(a67, b67); + + // Max values between the Q vectors. + const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff); + const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff); + const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff); + const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff); + + const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max); + const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min); + + // Split to D and start doing pairwise. + uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max)); + uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min)); + + // Enough runs of vpmax/min propogate the max/min values to every position. + ab_max = vpmax_u8(ab_max, ab_max); + ab_min = vpmin_u8(ab_min, ab_min); + + ab_max = vpmax_u8(ab_max, ab_max); + ab_min = vpmin_u8(ab_min, ab_min); + + ab_max = vpmax_u8(ab_max, ab_max); + ab_min = vpmin_u8(ab_min, ab_min); + + *min = *max = 0; // Clear high bits + // Store directly to avoid costly neon->gpr transfer. + vst1_lane_u8((uint8_t *)max, ab_max, 0); + vst1_lane_u8((uint8_t *)min, ab_min, 0); +} diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c index a44c52e8d..a8c996663 100644 --- a/vpx_dsp/avg.c +++ b/vpx_dsp/avg.c @@ -62,6 +62,8 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride, coeff[5] = c3 - c7; } +// The order of the output coeff of the hadamard is not important. For +// optimization purposes the final transpose may be skipped. void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, int16_t *coeff) { int idx; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 2b131929e..9ea80a098 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1014,7 +1014,7 @@ if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCO specialize qw/vpx_avg_4x4 sse2 neon msa/; add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; - specialize qw/vpx_minmax_8x8 sse2/; + specialize qw/vpx_minmax_8x8 sse2 neon/; add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc"; |