diff options
Diffstat (limited to 'vp9')
36 files changed, 4123 insertions, 547 deletions
diff --git a/vp9/common/ppc/vp9_idct_vsx.c b/vp9/common/ppc/vp9_idct_vsx.c new file mode 100644 index 000000000..1b2a93edb --- /dev/null +++ b/vp9/common/ppc/vp9_idct_vsx.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/ppc/inv_txfm_vsx.h" +#include "vpx_dsp/ppc/bitdepth_conversion_vsx.h" + +#include "vp9/common/vp9_enums.h" + +void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int16x8_t in[2], out[2]; + + in[0] = load_tran_low(0, input); + in[1] = load_tran_low(8 * sizeof(*input), input); + + switch (tx_type) { + case DCT_DCT: + vpx_idct4_vsx(in, out); + vpx_idct4_vsx(out, in); + break; + case ADST_DCT: + vpx_idct4_vsx(in, out); + vp9_iadst4_vsx(out, in); + break; + case DCT_ADST: + vp9_iadst4_vsx(in, out); + vpx_idct4_vsx(out, in); + break; + default: + assert(tx_type == ADST_ADST); + vp9_iadst4_vsx(in, out); + vp9_iadst4_vsx(out, in); + break; + } + + vpx_round_store4x4_vsx(in, out, dest, stride); +} + +void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int16x8_t in[8], out[8]; + + // load input data + in[0] = load_tran_low(0, input); + in[1] = load_tran_low(8 * sizeof(*input), input); + in[2] = load_tran_low(2 * 8 * sizeof(*input), input); + in[3] = load_tran_low(3 * 8 * sizeof(*input), input); + in[4] = load_tran_low(4 * 8 * sizeof(*input), input); + in[5] = load_tran_low(5 * 8 * sizeof(*input), input); + in[6] = load_tran_low(6 * 8 * sizeof(*input), input); + in[7] = load_tran_low(7 * 8 * sizeof(*input), input); + + switch (tx_type) { + case DCT_DCT: + vpx_idct8_vsx(in, out); + vpx_idct8_vsx(out, in); + break; + case ADST_DCT: + vpx_idct8_vsx(in, out); + vp9_iadst8_vsx(out, in); + break; + case DCT_ADST: + vp9_iadst8_vsx(in, out); + vpx_idct8_vsx(out, in); + break; + default: + assert(tx_type == ADST_ADST); + vp9_iadst8_vsx(in, out); + vp9_iadst8_vsx(out, in); + break; + } + + vpx_round_store8x8_vsx(in, dest, stride); +} + +void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride, int tx_type) { + int16x8_t in0[16], in1[16]; + + LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0); + LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input), + 8 * sizeof(*input), in1); + + switch (tx_type) { + case DCT_DCT: + vpx_idct16_vsx(in0, in1); + vpx_idct16_vsx(in0, in1); + break; + case ADST_DCT: + vpx_idct16_vsx(in0, in1); + vpx_iadst16_vsx(in0, in1); + break; + case DCT_ADST: + vpx_iadst16_vsx(in0, in1); + vpx_idct16_vsx(in0, in1); + break; + default: + assert(tx_type == ADST_ADST); + vpx_iadst16_vsx(in0, in1); + vpx_iadst16_vsx(in0, in1); + break; + } + + vpx_round_store16x16_vsx(in0, in1, dest, stride); +} diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 780b29208..147380650 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -60,6 +60,13 @@ typedef struct { #define GOLDEN_FRAME 2 #define ALTREF_FRAME 3 #define MAX_REF_FRAMES 4 + +#define LAST2_FRAME 4 +#define LAST3_FRAME 5 +#define BWDREF_FRAME 6 +#define ALTREF2_FRAME 7 +#define LAST_REF_FRAMES 3 + typedef int8_t MV_REFERENCE_FRAME; // This structure now relates to 8x8 block regions. diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 20c9ed641..6d7f95260 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -67,9 +67,9 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { # Note that there are more specializations appended when # CONFIG_VP9_HIGHBITDEPTH is off. - specialize qw/vp9_iht4x4_16_add neon sse2/; - specialize qw/vp9_iht8x8_64_add neon sse2/; - specialize qw/vp9_iht16x16_256_add neon sse2/; + specialize qw/vp9_iht4x4_16_add neon sse2 vsx/; + specialize qw/vp9_iht8x8_64_add neon sse2 vsx/; + specialize qw/vp9_iht16x16_256_add neon sse2 vsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { # Note that these specializations are appended to the above ones. specialize qw/vp9_iht4x4_16_add dspr2 msa/; @@ -129,10 +129,10 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_ add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size"; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_fp neon sse2 avx2/, "$ssse3_x86_64"; +specialize qw/vp9_quantize_fp neon sse2 avx2 vsx/, "$ssse3_x86_64"; add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64"; +specialize qw/vp9_quantize_fp_32x32 neon vsx/, "$ssse3_x86_64"; add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index d0e896c13..9c793f710 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1148,9 +1148,15 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { // Allocations in vp9_alloc_context_buffers() depend on individual // dimensions as well as the overall size. if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { - if (vp9_alloc_context_buffers(cm, width, height)) + if (vp9_alloc_context_buffers(cm, width, height)) { + // The cm->mi_* values have been cleared and any existing context + // buffers have been freed. Clear cm->width and cm->height to be + // consistent and to force a realloc next time. + cm->width = 0; + cm->height = 0; vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate context buffers"); + } } else { vp9_set_mb_mi(cm, width, height); } @@ -1528,7 +1534,7 @@ static int tile_worker_hook(void *arg1, void *arg2) { static int compare_tile_buffers(const void *a, const void *b) { const TileBuffer *const buf1 = (const TileBuffer *)a; const TileBuffer *const buf2 = (const TileBuffer *)b; - return (int)(buf2->size - buf1->size); + return (int)((int64_t)buf2->size - buf1->size); } static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, @@ -1724,6 +1730,21 @@ static void read_bitdepth_colorspace_sampling(VP9_COMMON *cm, } } +static INLINE void flush_all_fb_on_key(VP9_COMMON *cm) { + if (cm->frame_type == KEY_FRAME && cm->current_video_frame > 0) { + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + BufferPool *const pool = cm->buffer_pool; + int i; + for (i = 0; i < FRAME_BUFFERS; ++i) { + frame_bufs[i].ref_count = 0; + if (!frame_bufs[i].released) { + pool->release_fb_cb(pool->cb_priv, &frame_bufs[i].raw_frame_buffer); + frame_bufs[i].released = 1; + } + } + } +} + static size_t read_uncompressed_header(VP9Decoder *pbi, struct vpx_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; @@ -1788,6 +1809,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_frame_size(cm, rb); if (pbi->need_resync) { memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + flush_all_fb_on_key(cm); pbi->need_resync = 0; } } else { diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index a913fa560..d6eacaf44 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -69,6 +69,7 @@ static void vp9_dec_free_mi(VP9_COMMON *cm) { cm->mip = NULL; vpx_free(cm->mi_grid_base); cm->mi_grid_base = NULL; + cm->mi_alloc_size = 0; } VP9Decoder *vp9_decoder_create(BufferPool *const pool) { diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 4b26c314d..5f22c00cb 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -72,6 +72,8 @@ typedef struct VP9Decoder { int inv_tile_order; int need_resync; // wait for key/intra-only frame. int hold_ref_buf; // hold the reference buffer. + + int row_mt; } VP9Decoder; int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, diff --git a/vp9/encoder/ppc/vp9_quantize_vsx.c b/vp9/encoder/ppc/vp9_quantize_vsx.c new file mode 100644 index 000000000..3720b0876 --- /dev/null +++ b/vp9/encoder/ppc/vp9_quantize_vsx.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" + +#include "./vp9_rtcd.h" +#include "vpx_dsp/ppc/types_vsx.h" + +// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit +// integers, and return the high 16 bits of the intermediate integers. +// (a * b) >> 16 +// Note: Because this is done in 2 operations, a and b cannot both be UINT16_MIN +static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) { + // madds does ((A * B) >> 15) + C, we need >> 16, so we perform an extra right + // shift. + return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16); +} + +// Negate 16-bit integers in a when the corresponding signed 16-bit +// integer in b is negative. +static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { + const int16x8_t mask = vec_sra(b, vec_shift_sign_s16); + return vec_xor(vec_add(a, mask), mask); +} + +// Compare packed 16-bit integers across a, and return the maximum value in +// every element. Returns a vector containing the biggest value across vector a. +static INLINE int16x8_t vec_max_across(int16x8_t a) { + a = vec_max(a, vec_perm(a, a, vec_perm64)); + a = vec_max(a, vec_perm(a, a, vec_perm32)); + return vec_max(a, vec_perm(a, a, vec_perm16)); +} + +void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan_ptr, + const int16_t *iscan_ptr) { + int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; + bool16x8_t zero_coeff0, zero_coeff1; + + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr); + int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr); + + (void)scan_ptr; + (void)skip_block; + assert(!skip_block); + + // First set of 8 coeff starts with DC + 7 AC + qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + qcoeff0 = vec_sign(qcoeff0, coeff0); + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + + // Remove DC value from round and quant + round = vec_splat(round, 1); + quant = vec_splat(quant, 1); + + // Remove DC value from dequant + dequant = vec_splat(dequant, 1); + + // Second set of 8 coeff starts with (all AC) + qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + qcoeff1 = vec_sign(qcoeff1, coeff1); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + + eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); + + // We quantize 16 coeff up front (enough for a 4x4) and process 24 coeff per + // loop iteration. + // for 8x8: 16 + 2 x 24 = 64 + // for 16x16: 16 + 10 x 24 = 256 + if (n_coeffs > 16) { + int16x8_t coeff2, qcoeff2, dqcoeff2, eob2, scan2; + bool16x8_t zero_coeff2; + + int index = 16; + int off0 = 32; + int off1 = 48; + int off2 = 64; + + do { + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + scan0 = vec_vsx_ld(off0, iscan_ptr); + scan1 = vec_vsx_ld(off1, iscan_ptr); + scan2 = vec_vsx_ld(off2, iscan_ptr); + + qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + qcoeff0 = vec_sign(qcoeff0, coeff0); + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + + qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + qcoeff1 = vec_sign(qcoeff1, coeff1); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + + qcoeff2 = vec_mulhi(vec_vaddshs(vec_abs(coeff2), round), quant); + zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); + qcoeff2 = vec_sign(qcoeff2, coeff2); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + + eob = vec_max(eob, vec_or(scan0, zero_coeff0)); + eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); + eob = vec_max(eob, eob2); + + index += 24; + off0 += 48; + off1 += 48; + off2 += 48; + } while (index < n_coeffs); + } + + eob = vec_max_across(eob); + *eob_ptr = eob[0] + 1; +} + +// Sets the value of a 32-bit integers to 1 when the corresponding value in a is +// negative. +static INLINE int32x4_t vec_is_neg(int32x4_t a) { + return vec_sr(a, vec_shift_sign_s32); +} + +// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32 +// blocks are twice as big as for other block sizes. As such, using +// vec_mladd results in overflow. +static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, + int16x8_t dequant) { + int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); + int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); + // Add 1 if negative to round towards zero because the C uses division. + dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe)); + dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); + dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); + dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); + return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack); +} + +void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan_ptr, + const int16_t *iscan_ptr) { + // In stage 1, we quantize 16 coeffs (DC + 15 AC) + // In stage 2, we loop 42 times and quantize 24 coeffs per iteration + // (32 * 32 - 16) / 24 = 42 + int num_itr = 42; + // Offsets are in bytes, 16 coeffs = 32 bytes + int off0 = 32; + int off1 = 48; + int off2 = 64; + + int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; + bool16x8_t mask0, mask1, zero_coeff0, zero_coeff1; + + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr); + int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr); + int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2)); + int16x8_t abs_coeff0 = vec_abs(coeff0); + int16x8_t abs_coeff1 = vec_abs(coeff1); + + (void)scan_ptr; + (void)skip_block; + (void)n_coeffs; + assert(!skip_block); + + mask0 = vec_cmpge(abs_coeff0, thres); + round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16); + // First set of 8 coeff starts with DC + 7 AC + qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16); + qcoeff0 = vec_and(qcoeff0, mask0); + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + qcoeff0 = vec_sign(qcoeff0, coeff0); + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + + dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); + vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + + // Remove DC value from thres, round, quant and dequant + thres = vec_splat(thres, 1); + round = vec_splat(round, 1); + quant = vec_splat(quant, 1); + dequant = vec_splat(dequant, 1); + + mask1 = vec_cmpge(abs_coeff1, thres); + + // Second set of 8 coeff starts with (all AC) + qcoeff1 = + vec_madds(vec_vaddshs(vec_abs(coeff1), round), quant, vec_zeros_s16); + qcoeff1 = vec_and(qcoeff1, mask1); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + qcoeff1 = vec_sign(qcoeff1, coeff1); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); + vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + + eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); + + do { + int16x8_t coeff2, abs_coeff2, qcoeff2, dqcoeff2, eob2, scan2; + bool16x8_t zero_coeff2, mask2; + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + scan0 = vec_vsx_ld(off0, iscan_ptr); + scan1 = vec_vsx_ld(off1, iscan_ptr); + scan2 = vec_vsx_ld(off2, iscan_ptr); + + abs_coeff0 = vec_abs(coeff0); + abs_coeff1 = vec_abs(coeff1); + abs_coeff2 = vec_abs(coeff2); + + qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16); + qcoeff1 = vec_madds(vec_vaddshs(abs_coeff1, round), quant, vec_zeros_s16); + qcoeff2 = vec_madds(vec_vaddshs(abs_coeff2, round), quant, vec_zeros_s16); + + mask0 = vec_cmpge(abs_coeff0, thres); + mask1 = vec_cmpge(abs_coeff1, thres); + mask2 = vec_cmpge(abs_coeff2, thres); + + qcoeff0 = vec_and(qcoeff0, mask0); + qcoeff1 = vec_and(qcoeff1, mask1); + qcoeff2 = vec_and(qcoeff2, mask2); + + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); + + qcoeff0 = vec_sign(qcoeff0, coeff0); + qcoeff1 = vec_sign(qcoeff1, coeff1); + qcoeff2 = vec_sign(qcoeff2, coeff2); + + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + + dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); + dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); + dqcoeff2 = dequantize_coeff_32(qcoeff2, dequant); + + vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + + eob = vec_max(eob, vec_or(scan0, zero_coeff0)); + eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); + eob = vec_max(eob, eob2); + + off0 += 48; + off1 += 48; + off2 += 48; + num_itr--; + } while (num_itr != 0); + + eob = vec_max_across(eob); + *eob_ptr = eob[0] + 1; +} diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index b47840795..9f7be4f13 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -39,6 +39,7 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { } assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); + cr->counter_encode_maxq_scene_change = 0; return cr; } @@ -427,8 +428,11 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { double weight_segment_target = 0; double weight_segment = 0; int thresh_low_motion = (cm->width < 720) ? 55 : 20; + int qp_thresh = VPXMIN(20, rc->best_quality << 1); cr->apply_cyclic_refresh = 1; - if (cm->frame_type == KEY_FRAME || cpi->svc.temporal_layer_id > 0 || + if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 || + is_lossless_requested(&cpi->oxcf) || + rc->avg_frame_qindex[INTER_FRAME] < qp_thresh || (cpi->use_svc && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || (!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion && @@ -457,6 +461,18 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { cr->rate_boost_fac = 13; } } + // For screen-content: keep rate_ratio_qdelta to 2.0 (segment#1 boost) and + // percent_refresh (refresh rate) to 10. But reduce rate boost for segment#2 + // (rate_boost_fac = 10 disables segment#2). + // TODO(marpan): Consider increasing refresh rate after slide change. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { + cr->percent_refresh = 10; + // Increase the amount of refresh on scene change that is encoded at max Q, + // increase for a few cycles of the refresh period (~30 frames). + if (cr->counter_encode_maxq_scene_change < 30) cr->percent_refresh = 15; + cr->rate_ratio_qdelta = 2.0; + cr->rate_boost_fac = 10; + } // Adjust some parameters for low resolutions. if (cm->width <= 352 && cm->height <= 288) { if (rc->avg_frame_bandwidth < 3000) { @@ -491,6 +507,13 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { num8x8bl; if (weight_segment_target < 7 * weight_segment / 8) weight_segment = weight_segment_target; + // For screen-content: don't include target for the weight segment, + // since for all flat areas the segment is reset, so its more accurate + // to just use the previous actual number of seg blocks for the weight. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) + weight_segment = + (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) / + num8x8bl; cr->weight_segment = weight_segment; } @@ -501,6 +524,8 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; + // Reset if resoluton change has occurred. + if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi); if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation)) { // Set segmentation map to 0 and disable. unsigned char *const seg_map = cpi->segmentation_map; @@ -511,12 +536,14 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; cr->reduce_refresh = 0; + cr->counter_encode_maxq_scene_change = 0; } return; } else { int qindex_delta = 0; int qindex2; const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); + cr->counter_encode_maxq_scene_change++; vpx_clear_system_state(); // Set rate threshold to some multiple (set to 2 for now) of the target // rate (target is given by sb64_target_rate and scaled by 256). @@ -566,9 +593,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { cr->qindex_delta[2] = qindex_delta; vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); - // Reset if resoluton change has occurred. - if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi); - // Update the segmentation and refresh map. cyclic_refresh_update_map(cpi); } @@ -582,8 +606,19 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); - memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols); + memset(cr->last_coded_q_map, MAXQ, + cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; + cr->counter_encode_maxq_scene_change = 0; +} + +void vp9_cyclic_refresh_limit_q(const VP9_COMP *cpi, int *q) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + // For now apply hard limit to frame-level decrease in q, if the cyclic + // refresh is active (percent_refresh > 0). + if (cr->percent_refresh > 0 && cpi->rc.q_1_frame - *q > 8) { + *q = cpi->rc.q_1_frame - 8; + } } diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h index 77fa67c9e..50789e87c 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -68,6 +68,7 @@ struct CYCLIC_REFRESH { int reduce_refresh; double weight_segment; int apply_cyclic_refresh; + int counter_encode_maxq_scene_change; }; struct VP9_COMP; @@ -139,6 +140,8 @@ static INLINE int cyclic_refresh_segment_id(int segment_id) { return CR_SEGMENT_ID_BASE; } +void vp9_cyclic_refresh_limit_q(const struct VP9_COMP *cpi, int *q); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c index 477f62ba5..9cd8819c3 100644 --- a/vp9/encoder/vp9_aq_variance.c +++ b/vp9/encoder/vp9_aq_variance.c @@ -19,6 +19,7 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" +#include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_segmentation.h" #define ENERGY_MIN (-4) @@ -192,6 +193,40 @@ double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { return log(var + 1.0); } +// Get the range of sub block energy values; +void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *min_e, + int *max_e) { + VP9_COMMON *const cm = &cpi->common; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + int x, y; + + if (xmis < bw || ymis < bh) { + vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); + *min_e = vp9_block_energy(cpi, mb, bsize); + *max_e = *min_e; + } else { + int energy; + *min_e = ENERGY_MAX; + *max_e = ENERGY_MIN; + + for (y = 0; y < ymis; ++y) { + for (x = 0; x < xmis; ++x) { + vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x); + energy = vp9_block_energy(cpi, mb, BLOCK_8X8); + *min_e = VPXMIN(*min_e, energy); + *max_e = VPXMAX(*max_e, energy); + } + } + } + + // Re-instate source pointers back to what they should have been on entry. + vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); +} + #define DEFAULT_E_MIDPOINT 10.0 int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { double energy; diff --git a/vp9/encoder/vp9_aq_variance.h b/vp9/encoder/vp9_aq_variance.h index 211a69f39..12848f74c 100644 --- a/vp9/encoder/vp9_aq_variance.h +++ b/vp9/encoder/vp9_aq_variance.h @@ -20,7 +20,11 @@ extern "C" { unsigned int vp9_vaq_segment_id(int energy); void vp9_vaq_frame_setup(VP9_COMP *cpi); +void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *min_e, + int *max_e); int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); + double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); #ifdef __cplusplus diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 724205dd5..36cf227cb 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -92,6 +92,7 @@ struct macroblock { int sadperbit4; int rddiv; int rdmult; + int cb_rdmult; int mb_energy; // These are set to their default values at the beginning, and then adjusted @@ -180,6 +181,8 @@ struct macroblock { int sb_pickmode_part; + int zero_temp_sad_source; + // For each superblock: saves the content value (e.g., low/high sad/sumdiff) // based on source sad, prior to encoding the frame. uint8_t content_state_sb; diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 73423c075..2bcc26e94 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -75,6 +75,8 @@ typedef struct { // Used for the machine learning-based early termination int32_t sum_y_eobs; + // Skip certain ref frames during RD search of rectangular partitions. + uint8_t skip_ref_frame_mask; } PICK_MODE_CONTEXT; typedef struct PC_TREE { diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 8ec5dd91d..6abb082cd 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -189,7 +189,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx, - int use_svc, int spatial_layer) { + int use_svc, int spatial_layer, int use_gf_temporal_ref) { const int sse_diff = (ctx->newmv_sse == UINT_MAX) ? 0 : ((int)ctx->zeromv_sse - (int)ctx->newmv_sse); @@ -220,7 +220,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation( // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. if (frame != INTRA_FRAME && frame != ALTREF_FRAME && - (frame != GOLDEN_FRAME || num_spatial_layers == 1) && + (frame != GOLDEN_FRAME || num_spatial_layers == 1 || + use_gf_temporal_ref) && sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { mi->ref_frame[0] = ctx->best_reference_frame; mi->mode = ctx->best_sse_inter_mode; @@ -230,7 +231,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation( frame = ctx->best_zeromv_reference_frame; ctx->newmv_sse = ctx->zeromv_sse; // Bias to last reference. - if (num_spatial_layers > 1 || frame == ALTREF_FRAME || + if ((num_spatial_layers > 1 && !use_gf_temporal_ref) || + frame == ALTREF_FRAME || (frame != LAST_FRAME && ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) || denoiser->denoising_level >= kDenHigh))) { @@ -326,7 +328,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation( void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, - VP9_DENOISER_DECISION *denoiser_decision) { + VP9_DENOISER_DECISION *denoiser_decision, + int use_gf_temporal_ref) { int mv_col, mv_row; int motion_magnitude = 0; int zeromv_filter = 0; @@ -397,7 +400,8 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx, - cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id); + cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id, + use_gf_temporal_ref); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, @@ -448,13 +452,13 @@ void vp9_denoiser_update_frame_info( VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, - int svc_base_is_key, int second_spatial_layer) { + int svc_refresh_denoiser_buffers, int second_spatial_layer) { const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0; // Copy source into denoised reference buffers on KEY_FRAME or // if the just encoded frame was resized. For SVC, copy source if the base // spatial layer was key frame. if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset || - svc_base_is_key) { + svc_refresh_denoiser_buffers) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < denoiser->num_ref_frames; ++i) { diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index f4da24cbf..d25fe7edc 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h @@ -73,11 +73,12 @@ void vp9_denoiser_update_frame_info( VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, - int svc_base_is_key, int second_spatial_layer); + int svc_refresh_denoiser_buffers, int second_spatial_layer); void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, - VP9_DENOISER_DECISION *denoiser_decision); + VP9_DENOISER_DECISION *denoiser_decision, + int use_gf_temporal_ref); void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 091992dbd..e1207fea7 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -52,33 +52,6 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); -// Machine learning-based early termination parameters. -static const double train_mean[24] = { - 303501.697372, 3042630.372158, 24.694696, 1.392182, - 689.413511, 162.027012, 1.478213, 0.0, - 135382.260230, 912738.513263, 28.845217, 1.515230, - 544.158492, 131.807995, 1.436863, 0.0, - 43682.377587, 208131.711766, 28.084737, 1.356677, - 138.254122, 119.522553, 1.252322, 0.0 -}; - -static const double train_stdm[24] = { - 673689.212982, 5996652.516628, 0.024449, 1.989792, - 985.880847, 0.014638, 2.001898, 0.0, - 208798.775332, 1812548.443284, 0.018693, 1.838009, - 396.986910, 0.015657, 1.332541, 0.0, - 55888.847031, 448587.962714, 0.017900, 1.904776, - 98.652832, 0.016598, 1.320992, 0.0 -}; - -// Error tolerance: 0.01%-0.0.05%-0.1% -static const double classifiers[24] = { - 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863, - 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134, - 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700, - 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211 -}; - // This is used as a reference when computing the source variance for the // purpose of activity masking. // Eventually this should be replaced by custom no-reference routines, @@ -405,7 +378,8 @@ static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { static void get_variance(var *v) { v->variance = (int)(256 * (v->sum_square_error - - ((v->sum_error * v->sum_error) >> v->log2_count)) >> + (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> + v->log2_count)) >> v->log2_count); } @@ -447,7 +421,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, // No check for vert/horiz split as too few samples for variance. if (bsize == bsize_min) { // Variance already computed to set the force_split. - if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { @@ -457,9 +431,9 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, return 0; } else if (bsize > bsize_min) { // Variance already computed to set the force_split. - if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); // For key frame: take split for bsize above 32X32 or very high variance. - if (cm->frame_type == KEY_FRAME && + if (frame_is_intra_only(cm) && (bsize > BLOCK_32X32 || vt.part_variances->none.variance > (threshold << 4))) { return 0; @@ -531,7 +505,7 @@ static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, int content_state) { VP9_COMMON *const cm = &cpi->common; - const int is_key_frame = (cm->frame_type == KEY_FRAME); + const int is_key_frame = frame_is_intra_only(cm); const int threshold_multiplier = is_key_frame ? 20 : 1; int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); @@ -583,6 +557,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, } else { thresholds[1] = (5 * threshold_base) >> 1; } + if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; } } @@ -590,7 +565,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, int content_state) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - const int is_key_frame = (cm->frame_type == KEY_FRAME); + const int is_key_frame = frame_is_intra_only(cm); if (sf->partition_search_type != VAR_BASED_PARTITION && sf->partition_search_type != REFERENCE_PARTITION) { return; @@ -617,6 +592,11 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 ? (cpi->y_dequant[q][1] << 3) : 8000; + if (cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { + cpi->vbp_threshold_sad = 0; + cpi->vbp_threshold_copy = 0; + } } cpi->vbp_threshold_minmax = 15 + (q >> 3); } @@ -1203,6 +1183,7 @@ static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, cpi->content_state_sb_fd[sb_offset] = 0; } } + if (tmp_sad == 0) x->zero_temp_sad_source = 1; return tmp_sad; } @@ -1238,15 +1219,18 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, int pixels_wide = 64, pixels_high = 64; int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; + int scene_change_detected = + cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe); // For the variance computation under SVC mode, we treat the frame as key if // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). int is_key_frame = - (cm->frame_type == KEY_FRAME || + (frame_is_intra_only(cm) || (is_one_pass_cbr_svc(cpi) && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); // Always use 4x4 partition for key frame. - const int use_4x4_partition = cm->frame_type == KEY_FRAME; + const int use_4x4_partition = frame_is_intra_only(cm); const int low_res = (cm->width <= 352 && cm->height <= 288); int variance4x4downsample[16]; int segment_id; @@ -1299,6 +1283,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } // If source_sad is low copy the partition without computing the y_sad. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && + !scene_change_detected && copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { x->sb_use_mv_part = 1; if (cpi->sf.svc_use_lowres_part && @@ -1327,7 +1312,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, // 5-20 for the 16x16 blocks. - force_split[0] = 0; + force_split[0] = scene_change_detected; if (!is_key_frame) { // In the case of spatial/temporal scalable coding, the assumption here is @@ -1343,7 +1328,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, assert(yv12 != NULL); - if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) { + if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || + cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since // it may not be a temporal reference. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); @@ -1660,11 +1646,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } - if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) { + if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); } - if (cm->frame_type != KEY_FRAME && cpi->sf.svc_use_lowres_part && + if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); @@ -1927,13 +1913,22 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, } if (aq_mode == VARIANCE_AQ) { - const int energy = - bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize); - if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || cpi->force_update_segmentation || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - mi->segment_id = vp9_vaq_segment_id(energy); + int min_energy; + int max_energy; + + // Get sub block energy range + if (bsize >= BLOCK_32X32) { + vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, + &max_energy); + } else { + min_energy = bsize <= BLOCK_16X16 ? x->mb_energy + : vp9_block_energy(cpi, x, bsize); + } + + mi->segment_id = vp9_vaq_segment_id(min_energy); } else { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; @@ -1963,6 +1958,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, if (cyclic_refresh_segment_id_boosted( get_segment_id(cm, map, bsize, mi_row, mi_col))) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); + } else { + if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; } // Find best coding mode & reconstruct the MB so it is available @@ -1991,11 +1988,14 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); } - x->rdmult = orig_rdmult; - // TODO(jingning) The rate-distortion optimization flow needs to be // refactored to provide proper exit/return handle. - if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX; + if (rd_cost->rate == INT_MAX) + rd_cost->rdcost = INT64_MAX; + else + rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); + + x->rdmult = orig_rdmult; ctx->rate = rd_cost->rate; ctx->dist = rd_cost->dist; @@ -2122,6 +2122,10 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, PICK_MODE_CONTEXT *ctx) { MACROBLOCK *const x = &td->mb; set_offsets(cpi, tile, x, mi_row, mi_col, bsize); + + if (cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ) + x->rdmult = x->cb_rdmult; + update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); @@ -2453,7 +2457,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, } x->skip = ctx->skip; - x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0]; + x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0]; } static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, @@ -2629,6 +2633,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, ctx, INT64_MAX); break; case PARTITION_HORZ: + pc_tree->horizontal[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->horizontal[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && @@ -2638,6 +2643,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + pc_tree->horizontal[1].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { @@ -2650,6 +2656,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, } break; case PARTITION_VERT: + pc_tree->vertical[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && @@ -2659,6 +2666,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); @@ -3030,14 +3038,232 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, } #endif -// Calculate the score used in machine-learning based partition search early -// termination. -static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, - PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const double *clf; - const double *mean; - const double *sd; +#define NN_MAX_HIDDEN_LAYERS 10 +#define NN_MAX_NODES_PER_LAYER 128 + +// Neural net model config. +typedef struct { + int num_inputs; // Number of input nodes, i.e. features. + int num_outputs; // Number of output nodes. + int num_hidden_layers; // Number of hidden layers, maximum 10. + // Number of nodes for each hidden layer. + int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS]; + // Weight parameters, indexed by layer. + const float *weights[NN_MAX_HIDDEN_LAYERS + 1]; + // Bias parameters, indexed by layer. + const float *bias[NN_MAX_HIDDEN_LAYERS + 1]; +} NN_CONFIG; + +// Calculate prediction based on the given input features and neural net config. +// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden +// layer. +static void nn_predict(const float *features, const NN_CONFIG *nn_config, + float *output) { + int num_input_nodes = nn_config->num_inputs; + int buf_index = 0; + float buf[2][NN_MAX_NODES_PER_LAYER]; + const float *input_nodes = features; + + // Propagate hidden layers. + const int num_layers = nn_config->num_hidden_layers; + int layer, node, i; + assert(num_layers <= NN_MAX_HIDDEN_LAYERS); + for (layer = 0; layer < num_layers; ++layer) { + const float *weights = nn_config->weights[layer]; + const float *bias = nn_config->bias[layer]; + float *output_nodes = buf[buf_index]; + const int num_output_nodes = nn_config->num_hidden_nodes[layer]; + assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); + for (node = 0; node < num_output_nodes; ++node) { + float val = 0.0f; + for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; + val += bias[node]; + // ReLU as activation function. + val = VPXMAX(val, 0.0f); + output_nodes[node] = val; + weights += num_input_nodes; + } + num_input_nodes = num_output_nodes; + input_nodes = output_nodes; + buf_index = 1 - buf_index; + } + + // Final output layer. + { + const float *weights = nn_config->weights[num_layers]; + for (node = 0; node < nn_config->num_outputs; ++node) { + const float *bias = nn_config->bias[num_layers]; + float val = 0.0f; + for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; + output[node] = val + bias[node]; + weights += num_input_nodes; + } + } +} + +static const float partition_nn_weights_64x64_layer0[7 * 8] = { + -3.571348f, 0.014835f, -3.255393f, -0.098090f, -0.013120f, 0.000221f, + 0.056273f, 0.190179f, -0.268130f, -1.828242f, -0.010655f, 0.937244f, + -0.435120f, 0.512125f, 1.610679f, 0.190816f, -0.799075f, -0.377348f, + -0.144232f, 0.614383f, -0.980388f, 1.754150f, -0.185603f, -0.061854f, + -0.807172f, 1.240177f, 1.419531f, -0.438544f, -5.980774f, 0.139045f, + -0.032359f, -0.068887f, -1.237918f, 0.115706f, 0.003164f, 2.924212f, + 1.246838f, -0.035833f, 0.810011f, -0.805894f, 0.010966f, 0.076463f, + -4.226380f, -2.437764f, -0.010619f, -0.020935f, -0.451494f, 0.300079f, + -0.168961f, -3.326450f, -2.731094f, 0.002518f, 0.018840f, -1.656815f, + 0.068039f, 0.010586f, +}; + +static const float partition_nn_bias_64x64_layer0[8] = { + -3.469882f, 0.683989f, 0.194010f, 0.313782f, + -3.153335f, 2.245849f, -1.946190f, -3.740020f, +}; + +static const float partition_nn_weights_64x64_layer1[8] = { + -8.058566f, 0.108306f, -0.280620f, -0.818823f, + -6.445117f, 0.865364f, -1.127127f, -8.808660f, +}; + +static const float partition_nn_bias_64x64_layer1[1] = { + 6.46909416f, +}; + +static const NN_CONFIG partition_nnconfig_64x64 = { + 7, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + partition_nn_weights_64x64_layer0, + partition_nn_weights_64x64_layer1, + }, + { + partition_nn_bias_64x64_layer0, + partition_nn_bias_64x64_layer1, + }, +}; + +static const float partition_nn_weights_32x32_layer0[7 * 8] = { + -0.295437f, -4.002648f, -0.205399f, -0.060919f, 0.708037f, 0.027221f, + -0.039137f, -0.907724f, -3.151662f, 0.007106f, 0.018726f, -0.534928f, + 0.022744f, 0.000159f, -1.717189f, -3.229031f, -0.027311f, 0.269863f, + -0.400747f, -0.394366f, -0.108878f, 0.603027f, 0.455369f, -0.197170f, + 1.241746f, -1.347820f, -0.575636f, -0.462879f, -2.296426f, 0.196696f, + -0.138347f, -0.030754f, -0.200774f, 0.453795f, 0.055625f, -3.163116f, + -0.091003f, -0.027028f, -0.042984f, -0.605185f, 0.143240f, -0.036439f, + -0.801228f, 0.313409f, -0.159942f, 0.031267f, 0.886454f, -1.531644f, + -0.089655f, 0.037683f, -0.163441f, -0.130454f, -0.058344f, 0.060011f, + 0.275387f, 1.552226f, +}; + +static const float partition_nn_bias_32x32_layer0[8] = { + -0.838372f, -2.609089f, -0.055763f, 1.329485f, + -1.297638f, -2.636622f, -0.826909f, 1.012644f, +}; + +static const float partition_nn_weights_32x32_layer1[8] = { + -1.792632f, -7.322353f, -0.683386f, 0.676564f, + -1.488118f, -7.527719f, 1.240163f, 0.614309f, +}; + +static const float partition_nn_bias_32x32_layer1[1] = { + 4.97422546f, +}; + +static const NN_CONFIG partition_nnconfig_32x32 = { + 7, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + partition_nn_weights_32x32_layer0, + partition_nn_weights_32x32_layer1, + }, + { + partition_nn_bias_32x32_layer0, + partition_nn_bias_32x32_layer1, + }, +}; + +static const float partition_nn_weights_16x16_layer0[7 * 8] = { + -1.717673f, -4.718130f, -0.125725f, -0.183427f, -0.511764f, 0.035328f, + 0.130891f, -3.096753f, 0.174968f, -0.188769f, -0.640796f, 1.305661f, + 1.700638f, -0.073806f, -4.006781f, -1.630999f, -0.064863f, -0.086410f, + -0.148617f, 0.172733f, -0.018619f, 2.152595f, 0.778405f, -0.156455f, + 0.612995f, -0.467878f, 0.152022f, -0.236183f, 0.339635f, -0.087119f, + -3.196610f, -1.080401f, -0.637704f, -0.059974f, 1.706298f, -0.793705f, + -6.399260f, 0.010624f, -0.064199f, -0.650621f, 0.338087f, -0.001531f, + 1.023655f, -3.700272f, -0.055281f, -0.386884f, 0.375504f, -0.898678f, + 0.281156f, -0.314611f, 0.863354f, -0.040582f, -0.145019f, 0.029329f, + -2.197880f, -0.108733f, +}; + +static const float partition_nn_bias_16x16_layer0[8] = { + 0.411516f, -2.143737f, -3.693192f, 2.123142f, + -1.356910f, -3.561016f, -0.765045f, -2.417082f, +}; + +static const float partition_nn_weights_16x16_layer1[8] = { + -0.619755f, -2.202391f, -4.337171f, 0.611319f, + 0.377677f, -4.998723f, -1.052235f, 1.949922f, +}; + +static const float partition_nn_bias_16x16_layer1[1] = { + 3.20981717f, +}; + +static const NN_CONFIG partition_nnconfig_16x16 = { + 7, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + partition_nn_weights_16x16_layer0, + partition_nn_weights_16x16_layer1, + }, + { + partition_nn_bias_16x16_layer0, + partition_nn_bias_16x16_layer1, + }, +}; + +static const float partition_feature_mean[24] = { + 303501.697372f, 3042630.372158f, 24.694696f, 1.392182f, + 689.413511f, 162.027012f, 1.478213f, 0.0, + 135382.260230f, 912738.513263f, 28.845217f, 1.515230f, + 544.158492f, 131.807995f, 1.436863f, 0.0f, + 43682.377587f, 208131.711766f, 28.084737f, 1.356677f, + 138.254122f, 119.522553f, 1.252322f, 0.0f, +}; + +static const float partition_feature_std[24] = { + 673689.212982f, 5996652.516628f, 0.024449f, 1.989792f, + 985.880847f, 0.014638f, 2.001898f, 0.0f, + 208798.775332f, 1812548.443284f, 0.018693f, 1.838009f, + 396.986910f, 0.015657f, 1.332541f, 0.0f, + 55888.847031f, 448587.962714f, 0.017900f, 1.904776f, + 98.652832f, 0.016598f, 1.320992f, 0.0f, +}; + +// Error tolerance: 0.01%-0.0.05%-0.1% +static const float partition_linear_weights[24] = { + 0.111736f, 0.289977f, 0.042219f, 0.204765f, 0.120410f, -0.143863f, + 0.282376f, 0.847811f, 0.637161f, 0.131570f, 0.018636f, 0.202134f, + 0.112797f, 0.028162f, 0.182450f, 1.124367f, 0.386133f, 0.083700f, + 0.050028f, 0.150873f, 0.061119f, 0.109318f, 0.127255f, 0.625211f, +}; + +// Machine-learning based partition search early termination. +// Return 1 to skip split and rect partitions. +static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, + PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, + BLOCK_SIZE bsize) { const int mag_mv = abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); const int left_in_image = !!xd->left_mi; @@ -3047,11 +3273,32 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, int above_par = 0; // above_partitioning int left_par = 0; // left_partitioning int last_par = 0; // last_partitioning - BLOCK_SIZE context_size; - double score; int offset = 0; + int i; + BLOCK_SIZE context_size; + const NN_CONFIG *nn_config = NULL; + const float *mean, *sd, *linear_weights; + float nn_score, linear_score; + float features[7]; assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); + vpx_clear_system_state(); + + switch (bsize) { + case BLOCK_64X64: + offset = 0; + nn_config = &partition_nnconfig_64x64; + break; + case BLOCK_32X32: + offset = 8; + nn_config = &partition_nnconfig_32x32; + break; + case BLOCK_16X16: + offset = 16; + nn_config = &partition_nnconfig_16x16; + break; + default: assert(0 && "Unexpected block size."); return 0; + } if (above_in_image) { context_size = xd->above_mi->sb_type; @@ -3077,25 +3324,348 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, last_par = 1; } - if (bsize == BLOCK_64X64) - offset = 0; - else if (bsize == BLOCK_32X32) - offset = 8; - else if (bsize == BLOCK_16X16) - offset = 16; - - // early termination score calculation - clf = &classifiers[offset]; - mean = &train_mean[offset]; - sd = &train_stdm[offset]; - score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) + - clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) + - clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) + - clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) + - clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) + - clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) + - clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7]; - return score; + mean = &partition_feature_mean[offset]; + sd = &partition_feature_std[offset]; + features[0] = ((float)ctx->rate - mean[0]) / sd[0]; + features[1] = ((float)ctx->dist - mean[1]) / sd[1]; + features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2]; + features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3]; + features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4]; + features[5] = ((float)cm->base_qindex - mean[5]) * sd[5]; + features[6] = ((float)last_par - mean[6]) * sd[6]; + + // Predict using linear model. + linear_weights = &partition_linear_weights[offset]; + linear_score = linear_weights[7]; + for (i = 0; i < 7; ++i) linear_score += linear_weights[i] * features[i]; + if (linear_score > 0.1f) return 0; + + // Predict using neural net model. + nn_predict(features, nn_config, &nn_score); + + if (linear_score < -0.0f && nn_score < 0.1f) return 1; + if (nn_score < -0.0f && linear_score < 0.1f) return 1; + return 0; +} + +#define FEATURES 4 +#define Q_CTX 3 +#define RESOLUTION_CTX 2 +static const float partition_breakout_weights_64[RESOLUTION_CTX][Q_CTX] + [FEATURES + 1] = { + { + { + -0.016673f, + -0.001025f, + -0.000032f, + 0.000833f, + 1.94261885f - 2.1f, + }, + { + -0.160867f, + -0.002101f, + 0.000011f, + 0.002448f, + 1.65738142f - 2.5f, + }, + { + -0.628934f, + -0.011459f, + -0.000009f, + 0.013833f, + 1.47982645f - 1.6f, + }, + }, + { + { + -0.064309f, + -0.006121f, + 0.000232f, + 0.005778f, + 0.7989465f - 5.0f, + }, + { + -0.314957f, + -0.009346f, + -0.000225f, + 0.010072f, + 2.80695581f - 5.5f, + }, + { + -0.635535f, + -0.015135f, + 0.000091f, + 0.015247f, + 2.90381241f - 5.0f, + }, + }, + }; + +static const float partition_breakout_weights_32[RESOLUTION_CTX][Q_CTX] + [FEATURES + 1] = { + { + { + -0.010554f, + -0.003081f, + -0.000134f, + 0.004491f, + 1.68445992f - 3.5f, + }, + { + -0.051489f, + -0.007609f, + 0.000016f, + 0.009792f, + 1.28089404f - 2.5f, + }, + { + -0.163097f, + -0.013081f, + 0.000022f, + 0.019006f, + 1.36129403f - 3.2f, + }, + }, + { + { + -0.024629f, + -0.006492f, + -0.000254f, + 0.004895f, + 1.27919173f - 4.5f, + }, + { + -0.083936f, + -0.009827f, + -0.000200f, + 0.010399f, + 2.73731065f - 4.5f, + }, + { + -0.279052f, + -0.013334f, + 0.000289f, + 0.023203f, + 2.43595719f - 3.5f, + }, + }, + }; + +static const float partition_breakout_weights_16[RESOLUTION_CTX][Q_CTX] + [FEATURES + 1] = { + { + { + -0.013154f, + -0.002404f, + -0.000977f, + 0.008450f, + 2.57404566f - 5.5f, + }, + { + -0.019146f, + -0.004018f, + 0.000064f, + 0.008187f, + 2.15043926f - 2.5f, + }, + { + -0.075755f, + -0.010858f, + 0.000030f, + 0.024505f, + 2.06848121f - 2.5f, + }, + }, + { + { + -0.007636f, + -0.002751f, + -0.000682f, + 0.005968f, + 0.19225763f - 4.5f, + }, + { + -0.047306f, + -0.009113f, + -0.000518f, + 0.016007f, + 2.61068869f - 4.0f, + }, + { + -0.069336f, + -0.010448f, + -0.001120f, + 0.023083f, + 1.47591054f - 5.5f, + }, + }, + }; + +static const float partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX] + [FEATURES + 1] = { + { + { + -0.011807f, + -0.009873f, + -0.000931f, + 0.034768f, + 1.32254851f - 2.0f, + }, + { + -0.003861f, + -0.002701f, + 0.000100f, + 0.013876f, + 1.96755111f - 1.5f, + }, + { + -0.013522f, + -0.008677f, + -0.000562f, + 0.034468f, + 1.53440356f - 1.5f, + }, + }, + { + { + -0.003221f, + -0.002125f, + 0.000993f, + 0.012768f, + 0.03541421f - 2.0f, + }, + { + -0.006069f, + -0.007335f, + 0.000229f, + 0.026104f, + 0.17135315f - 1.5f, + }, + { + -0.039894f, + -0.011419f, + 0.000070f, + 0.061817f, + 0.6739977f - 1.5f, + }, + }, + }; + +// ML-based partition search breakout. +static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize, + const MACROBLOCK *const x, + const RD_COST *const rd_cost) { + DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; + const VP9_COMMON *const cm = &cpi->common; + float features[FEATURES]; + const float *linear_weights = NULL; // Linear model weights. + float linear_score = 0.0f; + const int qindex = cm->base_qindex; + const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2); + const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720; + const int resolution_ctx = is_720p_or_larger ? 1 : 0; + + switch (bsize) { + case BLOCK_64X64: + linear_weights = partition_breakout_weights_64[resolution_ctx][q_ctx]; + break; + case BLOCK_32X32: + linear_weights = partition_breakout_weights_32[resolution_ctx][q_ctx]; + break; + case BLOCK_16X16: + linear_weights = partition_breakout_weights_16[resolution_ctx][q_ctx]; + break; + case BLOCK_8X8: + linear_weights = partition_breakout_weights_8[resolution_ctx][q_ctx]; + break; + default: assert(0 && "Unexpected block size."); return 0; + } + if (!linear_weights) return 0; + + { // Generate feature values. + const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); + const int num_pels_log2 = num_pels_log2_lookup[bsize]; + int feature_index = 0; + unsigned int var, sse; + float rate_f, dist_f; + + var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, &sse); + var = var >> num_pels_log2; + + vpx_clear_system_state(); + + rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX); + dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2); + rate_f = + ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * + rate_f; + + features[feature_index++] = rate_f; + features[feature_index++] = dist_f; + features[feature_index++] = (float)var; + features[feature_index++] = (float)ac_q; + assert(feature_index == FEATURES); + } + + { // Calculate the output score. + int i; + linear_score = linear_weights[FEATURES]; + for (i = 0; i < FEATURES; ++i) + linear_score += linear_weights[i] * features[i]; + } + + return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx]; +} +#undef FEATURES +#undef Q_CTX +#undef RESOLUTION_CTX + +int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, + int orig_rdmult) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; + TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; + int tpl_stride = tpl_frame->stride; + int64_t intra_cost = 0; + int64_t mc_dep_cost = 0; + int mi_wide = num_8x8_blocks_wide_lookup[bsize]; + int mi_high = num_8x8_blocks_high_lookup[bsize]; + int row, col; + + int dr = 0; + int count = 0; + double r0, rk, beta; + + if (tpl_frame->is_valid == 0) return orig_rdmult; + + if (cpi->common.show_frame) return orig_rdmult; + + for (row = mi_row; row < mi_row + mi_high; ++row) { + for (col = mi_col; col < mi_col + mi_wide; ++col) { + TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; + + if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; + + intra_cost += this_stats->intra_cost; + mc_dep_cost += this_stats->mc_dep_cost; + + ++count; + } + } + + vpx_clear_system_state(); + + r0 = cpi->rd.r0; + rk = (double)intra_cost / mc_dep_cost; + beta = r0 / rk; + dr = vp9_get_adaptive_rdmult(cpi, beta); + + dr = VPXMIN(dr, orig_rdmult * 3 / 2); + dr = VPXMAX(dr, orig_rdmult * 1 / 2); + + dr = VPXMAX(1, dr); + + return dr; } // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are @@ -3145,15 +3715,22 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; + int must_split = 0; + int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ + ? x->cb_rdmult + : cpi->rd.RDMULT; + // Ref frames picked in the [i_th] quarter subblock during square partition + // RD search. It may be used to prune ref frame selection of rect partitions. + uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; (void)*tp_orig; assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); - // Adjust dist breakout threshold according to the partition size. dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); + rate_breakout_thr *= num_pels_log2_lookup[bsize]; vp9_rd_cost_init(&this_rdc); @@ -3177,10 +3754,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); } + // Get sub block energy range + if (bsize >= BLOCK_16X16) { + int min_energy, max_energy; + vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, + &max_energy); + must_split = (min_energy < -3) && (max_energy - min_energy > 2); + } + // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. if (cpi->sf.auto_min_max_partition_size) { - partition_none_allowed &= (bsize <= max_size && bsize >= min_size); + partition_none_allowed &= (bsize <= max_size); partition_horz_allowed &= ((bsize <= max_size && bsize > min_size) || force_horz_split); partition_vert_allowed &= @@ -3267,10 +3852,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, best_rdc.rdcost); if (this_rdc.rate != INT_MAX) { + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + const int ref1 = ctx->mic.ref_frame[0]; + const int ref2 = ctx->mic.ref_frame[1]; + for (i = 0; i < 4; ++i) { + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } + } if (bsize >= BLOCK_8X8) { + this_rdc.rdcost += RDCOST(partition_mul, x->rddiv, + cpi->partition_cost[pl][PARTITION_NONE], 0); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; - this_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); } if (this_rdc.rdcost < best_rdc.rdcost) { @@ -3279,28 +3872,42 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - if (!cpi->sf.ml_partition_search_early_termination) { - // If all y, u, v transform blocks in this partition are skippable, - // and the dist & rate are within the thresholds, the partition search - // is terminated for current branch of the partition search tree. - if (!x->e_mbd.lossless && ctx->skippable && - ((best_rdc.dist < (dist_breakout_thr >> 2)) || - (best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr))) { - do_split = 0; - do_rect = 0; - } - } else { + if (cpi->sf.ml_partition_search_early_termination) { // Currently, the machine-learning based partition search early // termination is only used while bsize is 16x16, 32x32 or 64x64, // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. if (!x->e_mbd.lossless && !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { - if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) { + if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) { + do_split = 0; + do_rect = 0; + } + } + } + + if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { + int use_ml_based_breakout = + cpi->sf.use_ml_partition_search_breakout && + cm->base_qindex >= 100; +#if CONFIG_VP9_HIGHBITDEPTH + if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + use_ml_based_breakout = 0; +#endif // CONFIG_VP9_HIGHBITDEPTH + if (use_ml_based_breakout) { + if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { do_split = 0; do_rect = 0; } + } else { + if (!cpi->sf.ml_partition_search_early_termination) { + if ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr)) { + do_split = 0; + do_rect = 0; + } + } } } @@ -3369,7 +3976,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_SPLIT // TODO(jingning): use the motion vectors given by the above search as // the starting point of motion search in the following partition type check. - if (do_split) { + if (do_split || must_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); if (bsize == BLOCK_8X8) { i = 4; @@ -3377,10 +3984,21 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rdcost); - - if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX; + if (sum_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + } else { + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0]; + const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1]; + for (i = 0; i < 4; ++i) { + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } + } + } } else { - for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { + for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); + ++i) { const int x_idx = (i & 1) * mi_step; const int y_idx = (i >> 1) * mi_step; @@ -3390,14 +4008,30 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); pc_tree->split[i]->index = i; + if (cpi->sf.prune_ref_frame_for_rect_partitions) + pc_tree->split[i]->none.rate = INT_MAX; rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, + // A must split test here increases the number of sub + // partitions but hurts metrics results quite a bit, + // so this extra test is commented out pending + // further tests on whether it adds much in terms of + // visual quality. + // (must_split) ? best_rdc.rdcost + // : best_rdc.rdcost - sum_rdc.rdcost, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; break; } else { + if (cpi->sf.prune_ref_frame_for_rect_partitions && + pc_tree->split[i]->none.rate != INT_MAX) { + const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0]; + const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1]; + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; @@ -3405,11 +4039,13 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } } - if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) { + if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { + sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv, + cpi->partition_cost[pl][PARTITION_SPLIT], 0); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { + if ((sum_rdc.rdcost < best_rdc.rdcost) || + (must_split && (sum_rdc.dist < best_rdc.dist))) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_SPLIT; @@ -3433,6 +4069,22 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } + pc_tree->horizontal[0].skip_ref_frame_mask = 0; + pc_tree->horizontal[1].skip_ref_frame_mask = 0; + pc_tree->vertical[0].skip_ref_frame_mask = 0; + pc_tree->vertical[1].skip_ref_frame_mask = 0; + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + uint8_t used_frames; + used_frames = ref_frames_used[0] | ref_frames_used[1]; + if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames; + used_frames = ref_frames_used[2] | ref_frames_used[3]; + if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames; + used_frames = ref_frames_used[0] | ref_frames_used[2]; + if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames; + used_frames = ref_frames_used[1] | ref_frames_used[3]; + if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames; + } + // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { @@ -3467,8 +4119,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (sum_rdc.rdcost < best_rdc.rdcost) { + sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv, + cpi->partition_cost[pl][PARTITION_HORZ], 0); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; @@ -3515,8 +4168,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (sum_rdc.rdcost < best_rdc.rdcost) { + sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv, + cpi->partition_cost[pl][PARTITION_VERT], 0); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_VERT; @@ -3626,6 +4280,14 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else { + int orig_rdmult = cpi->rd.RDMULT; + x->cb_rdmult = orig_rdmult; + if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) { + int dr = + get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult); + x->cb_rdmult = dr; + } + // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); @@ -3734,6 +4396,18 @@ static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x, } } +static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x, + RD_COST *rd_cost, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + TileDataEnc *tile_data, int mi_row, + int mi_col) { + if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { + vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); + } else { + vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); + } +} + static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, @@ -3764,17 +4438,23 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, if (cyclic_refresh_segment_id_boosted(mi->segment_id)) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); - if (cm->frame_type == KEY_FRAME) + if (frame_is_intra_only(cm)) hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, mi_col); else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize); - else if (bsize >= BLOCK_8X8) - vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); - else + else if (bsize >= BLOCK_8X8) { + if (cpi->rc.hybrid_intra_scene_change) + hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, + mi_col); + else + vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, + ctx); + } else { vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); + } duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); @@ -4401,6 +5081,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, x->skip_low_source_sad = 0; x->lowvar_highsumdiff = 0; x->content_state_sb = 0; + x->zero_temp_sad_source = 0; x->sb_use_mv_part = 0; x->sb_mvcol_part = 0; x->sb_mvrow_part = 0; @@ -4459,7 +5140,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, // nonrd_pick_partition does not support 4x4 partition, so avoid it // on key frame for now. if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && - cpi->oxcf.speed < 6 && cm->frame_type != KEY_FRAME && + cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { // Use lower max_partition_size for low resoultions. if (cm->width <= 352 && cm->height <= 288) @@ -4475,7 +5156,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, // TODO(marpan): Seems like nonrd_select_partition does not support // 4x4 partition. Since 4x4 is used on key frame, use this switch // for now. - if (cm->frame_type == KEY_FRAME) + if (frame_is_intra_only(cm)) nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); else @@ -4663,6 +5344,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) { for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; +#if CONFIG_CONSISTENT_RECODE + tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; +#endif tile_data->mode_map[i][j] = j; } } @@ -4787,7 +5471,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; - +#if CONFIG_CONSISTENT_RECODE + x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; +#endif if (xd->lossless) x->optimize = 0; cm->tx_mode = select_tx_mode(cpi, xd); @@ -4830,6 +5516,27 @@ static void encode_frame_internal(VP9_COMP *cpi) { if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) source_var_based_partition_search_method(cpi); + } else if (cpi->twopass.gf_group.index && cpi->sf.enable_tpl_model) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; + TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; + + int tpl_stride = tpl_frame->stride; + int64_t intra_cost_base = 0; + int64_t mc_dep_cost_base = 0; + int row, col; + + for (row = 0; row < cm->mi_rows; ++row) { + for (col = 0; col < cm->mi_cols; ++col) { + TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; + intra_cost_base += this_stats->intra_cost; + mc_dep_cost_base += this_stats->mc_dep_cost; + } + } + + vpx_clear_system_state(); + + if (tpl_frame->is_valid) + cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; } { @@ -4912,9 +5619,48 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) { return sum_delta / (cm->mi_rows * cm->mi_cols); } +#if CONFIG_CONSISTENT_RECODE +static void restore_encode_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + int i, j; + RD_OPT *rd_opt = &cpi->rd; + for (i = 0; i < MAX_REF_FRAMES; i++) { + for (j = 0; j < REFERENCE_MODES; j++) + rd_opt->prediction_type_threshes[i][j] = + rd_opt->prediction_type_threshes_prev[i][j]; + + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) + rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; + } + + if (cpi->tile_data != NULL) { + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + for (i = 0; i < BLOCK_SIZES; ++i) { + for (j = 0; j < MAX_MODES; ++j) { + tile_data->thresh_freq_fact[i][j] = + tile_data->thresh_freq_fact_prev[i][j]; + } + } + } + } + + cm->interp_filter = cpi->sf.default_interp_filter; +} +#endif + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; +#if CONFIG_CONSISTENT_RECODE + restore_encode_params(cpi); +#endif + // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a // different sign bias and that buffer is then the fixed ref. However, this @@ -5095,7 +5841,8 @@ static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; - if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { + if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) && + mi->segment_id <= CR_SEGMENT_ID_BOOST2) { if (abs(mv.row) < 8 && abs(mv.col) < 8) { if (cpi->consec_zero_mv[map_offset] < 255) cpi->consec_zero_mv[map_offset]++; @@ -5190,7 +5937,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); - if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0) + if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && + (!cpi->use_svc || + (cpi->use_svc && + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); } } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 3384de7ea..05128eb1f 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -35,6 +35,7 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/common/vp9_scan.h" #include "vp9/encoder/vp9_alt_ref_aq.h" #include "vp9/encoder/vp9_aq_360.h" @@ -44,10 +45,11 @@ #include "vp9/encoder/vp9_bitstream.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_encodeframe.h" +#include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_ethread.h" +#include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" #include "vp9/encoder/vp9_multi_thread.h" @@ -84,6 +86,9 @@ static FILE *yuv_skinmap_file = NULL; #ifdef OUTPUT_YUV_REC FILE *yuv_rec_file; #endif +#ifdef OUTPUT_YUV_SVC_SRC +FILE *yuv_svc_src[3] = { NULL, NULL, NULL }; +#endif #if 0 FILE *framepsnr; @@ -839,6 +844,7 @@ static void vp9_enc_free_mi(VP9_COMMON *cm) { cm->mi_grid_base = NULL; vpx_free(cm->prev_mi_grid_base); cm->prev_mi_grid_base = NULL; + cm->mi_alloc_size = 0; } static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { @@ -1371,9 +1377,14 @@ static void update_frame_size(VP9_COMP *cpi) { } static void init_buffer_indices(VP9_COMP *cpi) { - cpi->lst_fb_idx = 0; - cpi->gld_fb_idx = 1; - cpi->alt_fb_idx = 2; + int ref_frame; + + for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) + cpi->ref_fb_idx[ref_frame] = ref_frame; + + cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1]; + cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1]; + cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; } static void init_level_constraint(LevelConstraint *lc) { @@ -2082,7 +2093,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, BufferPool *const pool) { - unsigned int i; + unsigned int i, frame; VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL; @@ -2250,6 +2261,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, #ifdef OUTPUT_YUV_REC yuv_rec_file = fopen("rec.yuv", "wb"); #endif +#ifdef OUTPUT_YUV_SVC_SRC + yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb"); + yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb"); + yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb"); +#endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); @@ -2328,6 +2344,23 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_set_speed_features_framesize_independent(cpi); vp9_set_speed_features_framesize_dependent(cpi); + if (cpi->sf.enable_tpl_model) { + for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) { + int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); + + CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, + vpx_calloc(mi_rows * mi_cols, + sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); + cpi->tpl_stats[frame].is_valid = 0; + cpi->tpl_stats[frame].width = mi_cols; + cpi->tpl_stats[frame].height = mi_rows; + cpi->tpl_stats[frame].stride = mi_cols; + cpi->tpl_stats[frame].mi_rows = cm->mi_rows; + cpi->tpl_stats[frame].mi_cols = cm->mi_cols; + } + } + // Allocate memory to store variances for a frame. CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); cpi->source_var_thresh = 0; @@ -2416,7 +2449,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *cm; - unsigned int i; + unsigned int i, frame; int t; if (!cpi) return; @@ -2511,6 +2544,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_denoiser_free(&(cpi->denoiser)); #endif + for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) { + vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); + cpi->tpl_stats[frame].is_valid = 0; + } + for (t = 0; t < cpi->num_workers; ++t) { VPxWorker *const worker = &cpi->workers[t]; EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; @@ -2568,6 +2606,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #ifdef OUTPUT_YUV_REC fclose(yuv_rec_file); #endif +#ifdef OUTPUT_YUV_SVC_SRC + fclose(yuv_svc_src[0]); + fclose(yuv_svc_src[1]); + fclose(yuv_svc_src[2]); +#endif #if 0 @@ -2917,7 +2960,208 @@ static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q, return force_recode; } -void vp9_update_reference_frames(VP9_COMP *cpi) { +// This function is used to shift the virtual indices of last reference frames +// as follows: +// LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME +// when the LAST_FRAME is updated. +static INLINE void shift_last_ref_frames(VP9_COMP *cpi) { + int ref_frame; + for (ref_frame = LAST_REF_FRAMES - 1; ref_frame > 0; --ref_frame) { + cpi->ref_fb_idx[ref_frame] = cpi->ref_fb_idx[ref_frame - 1]; + + // [0] is allocated to the current coded frame. The statistics for the + // reference frames start at [LAST_FRAME], i.e. [1]. + if (!cpi->rc.is_src_frame_alt_ref) { + memcpy(cpi->interp_filter_selected[ref_frame + LAST_FRAME], + cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME], + sizeof(cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME])); + } + } +} + +void update_multi_arf_ref_frames(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + BufferPool *const pool = cm->buffer_pool; + + // NOTE: Save the new show frame buffer index for --test-code=warn, i.e., + // for the purpose to verify no mismatch between encoder and decoder. + if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx; + + // At this point the new frame has been encoded. + // If any buffer copy / swapping is signaled it should be done here. + + if (cm->frame_type == KEY_FRAME) { + int ref_frame; + for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->ref_fb_idx[ref_frame]], + cm->new_fb_idx); + } + return; + } + + if (vp9_preserve_existing_gf(cpi)) { + // We have decided to preserve the previously existing golden frame as our + // new ARF frame. However, in the short term in function + // av1_bitstream.c::get_refresh_mask() we left it in the GF slot and, if + // we're updating the GF with the current decoded frame, we save it to the + // ARF slot instead. + // We now have to update the ARF with the current frame and swap gld_fb_idx + // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF + // slot and, if we're updating the GF, the current frame becomes the new GF. + int tmp; + + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF_FRAME - 1]], + cm->new_fb_idx); + tmp = cpi->ref_fb_idx[ALTREF_FRAME - 1]; + cpi->ref_fb_idx[ALTREF_FRAME - 1] = cpi->ref_fb_idx[GOLDEN_FRAME - 1]; + cpi->ref_fb_idx[GOLDEN_FRAME - 1] = tmp; + + // We need to modify the mapping accordingly + cpi->arf_map[0] = cpi->ref_fb_idx[ALTREF_FRAME - 1]; + } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) { + // Deal with the special case for showing existing internal ALTREF_FRAME + // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME + // by updating the virtual indices. + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + const int which_arf = gf_group->arf_ref_idx[gf_group->index]; + int tmp; + assert(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE); + + tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1]; + shift_last_ref_frames(cpi); + + cpi->ref_fb_idx[LAST_FRAME - 1] = cpi->ref_fb_idx[ALTREF2_FRAME - 1]; + cpi->ref_fb_idx[ALTREF2_FRAME - 1] = tmp; + + // We need to modify the mapping accordingly + cpi->arf_map[which_arf] = cpi->ref_fb_idx[ALTREF2_FRAME - 1]; + + memcpy(cpi->interp_filter_selected[LAST_FRAME], + cpi->interp_filter_selected[ALTREF2_FRAME], + sizeof(cpi->interp_filter_selected[ALTREF2_FRAME])); + } else { /* For non key/golden frames */ + // === ALTREF_FRAME === + if (cpi->refresh_alt_ref_frame) { + int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; + int which_arf = 0; + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); + + memcpy(cpi->interp_filter_selected[ALTREF_FRAME + which_arf], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); + } + + // === GOLDEN_FRAME === + if (cpi->refresh_golden_frame) { + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]], + cm->new_fb_idx); + + memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); + } + + // === BWDREF_FRAME === + if (cpi->refresh_bwd_ref_frame) { + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->ref_fb_idx[BWDREF_FRAME - 1]], + cm->new_fb_idx); + + memcpy(cpi->interp_filter_selected[BWDREF_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); + } + + // === ALTREF2_FRAME === + if (cpi->refresh_alt2_ref_frame) { + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]], + cm->new_fb_idx); + + memcpy(cpi->interp_filter_selected[ALTREF2_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); + } + } + + if (cpi->refresh_last_frame) { + // NOTE(zoeliu): We have two layers of mapping (1) from the per-frame + // reference to the reference frame buffer virtual index; and then (2) from + // the virtual index to the reference frame buffer physical index: + // + // LAST_FRAME, ..., LAST3_FRAME, ..., ALTREF_FRAME + // | | | + // v v v + // ref_fb_idx[0], ..., ref_fb_idx[2], ..., ref_fb_idx[ALTREF_FRAME-1] + // | | | + // v v v + // ref_frame_map[], ..., ref_frame_map[], ..., ref_frame_map[] + // + // When refresh_last_frame is set, it is intended to retire LAST3_FRAME, + // have the other 2 LAST reference frames shifted as follows: + // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME + // , and then have LAST_FRAME refreshed by the newly coded frame. + // + // To fulfill it, the decoder will be notified to execute following 2 steps: + // + // (a) To change ref_frame_map[] and have the virtual index of LAST3_FRAME + // to point to the newly coded frame, i.e. + // ref_frame_map[lst_fb_idexes[2]] => new_fb_idx; + // + // (b) To change the 1st layer mapping to have LAST_FRAME mapped to the + // original virtual index of LAST3_FRAME and have the other mappings + // shifted as follows: + // LAST_FRAME, LAST2_FRAME, LAST3_FRAME + // | | | + // v v v + // ref_fb_idx[2], ref_fb_idx[0], ref_fb_idx[1] + int tmp; + + ref_cnt_fb(pool->frame_bufs, + &cm->ref_frame_map[cpi->ref_fb_idx[LAST_REF_FRAMES - 1]], + cm->new_fb_idx); + + tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1]; + + shift_last_ref_frames(cpi); + cpi->ref_fb_idx[0] = tmp; + + assert(cm->show_existing_frame == 0); + memcpy(cpi->interp_filter_selected[LAST_FRAME], + cpi->interp_filter_selected[0], + sizeof(cpi->interp_filter_selected[0])); + + if (cpi->rc.is_last_bipred_frame) { + // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the + // LAST3_FRAME by updating the virtual indices. + // + // NOTE: The source frame for BWDREF does not have a holding position as + // the OVERLAY frame for ALTREF's. Hence, to resolve the reference + // virtual index reshuffling for BWDREF, the encoder always + // specifies a LAST_BIPRED right before BWDREF and completes the + // reshuffling job accordingly. + tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1]; + + shift_last_ref_frames(cpi); + cpi->ref_fb_idx[0] = cpi->ref_fb_idx[BWDREF_FRAME - 1]; + cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp; + + memcpy(cpi->interp_filter_selected[LAST_FRAME], + cpi->interp_filter_selected[BWDREF_FRAME], + sizeof(cpi->interp_filter_selected[BWDREF_FRAME])); + } + } + + // Assign virtual indexes for LAST_FRAME, GOLDEN_FRAME, and ALTREF_FRAME + cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1]; + cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1]; + cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; +} + +void update_ref_frames(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; @@ -2981,25 +3225,38 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); } +} + +void vp9_update_reference_frames(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + BufferPool *const pool = cm->buffer_pool; + SVC *const svc = &cpi->svc; + + if (cpi->extra_arf_allowed) + update_multi_arf_ref_frames(cpi); + else + update_ref_frames(cpi); + #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { - int svc_base_is_key = 0; + int svc_refresh_denoiser_buffers = 0; int denoise_svc_second_layer = 0; + FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; if (cpi->use_svc) { int realloc_fail = 0; const int svc_buf_shift = - cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 + svc->number_spatial_layers - svc->spatial_layer_id == 2 ? cpi->denoiser.num_ref_frames : 0; - int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - svc_base_is_key = lc->is_key_frame; + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + svc_refresh_denoiser_buffers = + lc->is_key_frame || svc->spatial_layer_sync[svc->spatial_layer_id]; denoise_svc_second_layer = - cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1 - : 0; + svc->number_spatial_layers - svc->spatial_layer_id == 2 ? 1 : 0; // Check if we need to allocate extra buffers in the denoiser // for // refreshed frames. @@ -3012,38 +3269,43 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { "Failed to re-allocate denoiser for SVC"); } vp9_denoiser_update_frame_info( - &cpi->denoiser, *cpi->Source, cpi->common.frame_type, - cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, - cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, - cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key, - denoise_svc_second_layer); + &cpi->denoiser, *cpi->Source, frame_type, cpi->refresh_alt_ref_frame, + cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, + cpi->gld_fb_idx, cpi->lst_fb_idx, cpi->resize_pending, + svc_refresh_denoiser_buffers, denoise_svc_second_layer); } #endif + if (is_one_pass_cbr_svc(cpi)) { // Keep track of frame index for each reference frame. - SVC *const svc = &cpi->svc; if (cm->frame_type == KEY_FRAME) { int i; - svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; - svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; - svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; // On key frame update all reference frame slots. for (i = 0; i < REF_FRAMES; i++) { + svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id; // LAST/GOLDEN/ALTREF is already updated above. if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx) ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); } } else { - if (cpi->refresh_last_frame) - svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; - if (cpi->refresh_golden_frame) - svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; - if (cpi->refresh_alt_ref_frame) - svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; + if (cpi->refresh_last_frame) { + svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id; + } + if (cpi->refresh_golden_frame) { + svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id; + } + if (cpi->refresh_alt_ref_frame) { + svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id; + } } // Copy flags from encoder to SVC struct. vp9_copy_flags_ref_update_idx(cpi); + vp9_svc_update_ref_frame_buffer_idx(cpi); } } @@ -3574,10 +3836,43 @@ static void set_frame_size(VP9_COMP *cpi) { set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); } -static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, - uint8_t *dest) { +#if CONFIG_CONSISTENT_RECODE +static void save_encode_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - int q = 0, bottom_index = 0, top_index = 0; // Dummy variables. + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + int i, j; + RD_OPT *rd_opt = &cpi->rd; + for (i = 0; i < MAX_REF_FRAMES; i++) { + for (j = 0; j < REFERENCE_MODES; j++) + rd_opt->prediction_type_threshes_prev[i][j] = + rd_opt->prediction_type_threshes[i][j]; + + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) + rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j]; + } + + if (cpi->tile_data != NULL) { + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + for (i = 0; i < BLOCK_SIZES; ++i) { + for (j = 0; j < MAX_MODES; ++j) { + tile_data->thresh_freq_fact_prev[i][j] = + tile_data->thresh_freq_fact[i][j]; + } + } + } + } +} +#endif + +static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, + uint8_t *dest) { + VP9_COMMON *const cm = &cpi->common; + int q = 0, bottom_index = 0, top_index = 0; const INTERP_FILTER filter_scaler = (is_one_pass_cbr_svc(cpi)) ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] @@ -3623,6 +3918,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0), filter_scaler, phase_scaler); } +#ifdef OUTPUT_YUV_SVC_SRC + // Write out at most 3 spatial layers. + if (is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id < 3) { + vpx_write_yuv_frame(yuv_svc_src[cpi->svc.spatial_layer_id], cpi->Source); + } +#endif // Unfiltered raw source used in metrics calculation if the source // has been filtered. if (is_psnr_calc_enabled(cpi)) { @@ -3672,7 +3973,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cpi->Last_Source->y_height != cpi->Source->y_height) cpi->compute_source_sad_onepass = 0; - if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) { + if (frame_is_intra_only(cm) || cpi->resize_pending != 0) { memset(cpi->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); } @@ -3683,15 +3984,29 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now // (need to check encoding time cost for doing this for speed 8). cpi->rc.high_source_sad = 0; + cpi->rc.hybrid_intra_scene_change = 0; + cpi->rc.re_encode_maxq_scene_change = 0; if (cm->show_frame && cpi->oxcf.mode == REALTIME && (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.content == VP9E_CONTENT_SCREEN || - (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc))) + (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8))) vp9_scene_detection_onepass(cpi); if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad; + // For 1 pass CBR, check if we are dropping this frame. + // Never drop on key frame, if base layer is key for svc, + // on scene change, or if superframe has layer sync. + if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && + !frame_is_intra_only(cm) && !cpi->rc.high_source_sad && + !cpi->svc.high_source_sad_superframe && + !cpi->svc.superframe_has_layer_sync && + (!cpi->use_svc || + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + if (vp9_rc_drop_frame(cpi)) return 0; + } + // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). @@ -3715,7 +4030,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } } - if (cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 && + // TODO(jianj): Look into issue of skin detection with high bitdepth. + if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { @@ -3729,10 +4045,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, suppress_active_map(cpi); - // For SVC on non-zero spatial layer: check for disabling inter-layer - // prediction. - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) - vp9_svc_constrain_inter_layer_pred(cpi); + if (cpi->use_svc) { + // On non-zero spatial layer, check for disabling inter-layer + // prediction. + if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); + vp9_svc_assert_constraints_pattern(cpi); + } // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. @@ -3748,7 +4066,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // it may be pretty bad for rate-control, // and I should handle it somehow vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi); - } else if (cpi->roi.enabled && cm->frame_type != KEY_FRAME) { + } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) { apply_roi_map(cpi); } @@ -3777,8 +4095,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, suppress_active_map(cpi); // Turn-off cyclic refresh for re-encoded frame. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); + memset(cr->last_coded_q_map, MAXQ, + cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); + cr->sb_index = 0; vp9_disable_segmentation(&cm->seg); } apply_active_map(cpi); @@ -3788,13 +4110,14 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // Update some stats from cyclic refresh, and check for golden frame update. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - cm->frame_type != KEY_FRAME) + !frame_is_intra_only(cm)) vp9_cyclic_refresh_postencode(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. // update_base_skip_probs(cpi); vpx_clear_system_state(); + return 1; } #define MAX_QSTEP_ADJ 4 @@ -4148,12 +4471,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, vp9_encode_frame(cpi); vpx_clear_system_state(); restore_coding_context(cpi); - vp9_pack_bitstream(cpi, dest, size); - - vp9_encode_frame(cpi); - vpx_clear_system_state(); - - restore_coding_context(cpi); } } @@ -4485,11 +4802,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cpi->oxcf.target_bandwidth == 0) { cpi->svc.skip_enhancement_layer = 1; vp9_rc_postencode_update_drop_frame(cpi); - vp9_inc_frame_in_layer(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->last_frame_dropped = 1; cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; + if (cpi->svc.framedrop_mode == LAYER_DROP || + cpi->svc.drop_spatial_layer[0] == 0) { + // For the case of constrained drop mode where the base is dropped + // (drop_spatial_layer[0] == 1), which means full superframe dropped, + // we don't increment the svc frame counters. In particular temporal + // layer counter (which is incremented in vp9_inc_frame_in_layer()) + // won't be incremented, so on a dropped frame we try the same + // temporal_layer_id on next incoming frame. This is to avoid an + // issue with temporal alignement with full superframe dropping. + vp9_inc_frame_in_layer(cpi); + } return; } @@ -4538,55 +4865,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, } } - // For 1 pass CBR, check if we are dropping this frame. - // Never drop on key frame, or if base layer is key for svc. - if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && - cm->frame_type != KEY_FRAME && - (!cpi->use_svc || - !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { - int svc_prev_layer_dropped = 0; - // In the contrained framedrop mode for svc (framedrop_mode = - // CONSTRAINED_LAYER_DROP), if the previous spatial layer was dropped, drop - // the current spatial layer. - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) - svc_prev_layer_dropped = 1; - if ((svc_prev_layer_dropped && - cpi->svc.framedrop_mode == CONSTRAINED_LAYER_DROP) || - vp9_rc_drop_frame(cpi)) { - vp9_rc_postencode_update_drop_frame(cpi); - cpi->ext_refresh_frame_flags_pending = 0; - cpi->last_frame_dropped = 1; - if (cpi->use_svc) { - cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; - vp9_inc_frame_in_layer(cpi); - cpi->svc.skip_enhancement_layer = 1; - if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { - int i; - int all_layers_drop = 1; - for (i = 0; i < cpi->svc.spatial_layer_id; i++) { - if (cpi->svc.drop_spatial_layer[i] == 0) { - all_layers_drop = 0; - break; - } - } - if (all_layers_drop == 1) cpi->svc.skip_enhancement_layer = 0; - } - } - return; - } - } - vpx_clear_system_state(); #if CONFIG_INTERNAL_STATS memset(cpi->mode_chosen_counts, 0, MAX_MODES * sizeof(*cpi->mode_chosen_counts)); #endif +#if CONFIG_CONSISTENT_RECODE + // Backup to ensure consistency between recodes + save_encode_params(cpi); +#endif if (cpi->sf.recode_loop == DISALLOW_RECODE) { - encode_without_recode_loop(cpi, size, dest); + if (!encode_without_recode_loop(cpi, size, dest)) return; } else { encode_with_recode_loop(cpi, size, dest); } @@ -4725,17 +5016,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, } cm->prev_frame = cm->cur_frame; - if (cpi->use_svc) + if (cpi->use_svc) { cpi->svc .layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id] .last_frame_type = cm->frame_type; + // Reset layer_sync back to 0 for next frame. + cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0; + } cpi->force_update_segmentation = 0; if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi); + + cpi->svc.previous_frame_is_intra_only = cm->intra_only; + cpi->svc.set_intra_only_frame = 0; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -4823,6 +5120,12 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, check_initial_width(cpi, subsampling_x, subsampling_y); #endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_VP9_HIGHBITDEPTH + // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for + // 8 bits. + if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0; +#endif + #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif @@ -5186,6 +5489,553 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) { } } +typedef struct GF_PICTURE { + YV12_BUFFER_CONFIG *frame; + int ref_frame[3]; +} GF_PICTURE; + +void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, + const GF_GROUP *gf_group, int *tpl_group_frames) { + int frame_idx, i; + int gld_index = -1; + int alt_index = -1; + int lst_index = -1; + int extend_frame_count = 0; + int pframe_qindex = cpi->tpl_stats[2].base_qindex; + + *tpl_group_frames = 0; + + // Initialize Golden reference frame. + gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1; + gld_index = 0; + ++*tpl_group_frames; + + // Initialize ARF frame + gf_picture[1].frame = cpi->Source; + gf_picture[1].ref_frame[0] = gld_index; + gf_picture[1].ref_frame[1] = lst_index; + gf_picture[1].ref_frame[2] = alt_index; + alt_index = 1; + ++*tpl_group_frames; + + // Initialize P frames + for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) { + struct lookahead_entry *buf = + vp9_lookahead_peek(cpi->lookahead, frame_idx - 2); + + if (buf == NULL) break; + + gf_picture[frame_idx].frame = &buf->img; + gf_picture[frame_idx].ref_frame[0] = gld_index; + gf_picture[frame_idx].ref_frame[1] = lst_index; + gf_picture[frame_idx].ref_frame[2] = alt_index; + + ++*tpl_group_frames; + lst_index = frame_idx; + if (gf_group->update_type[frame_idx] == OVERLAY_UPDATE) break; + } + + gld_index = frame_idx; + lst_index = VPXMAX(0, frame_idx - 1); + alt_index = -1; + ++frame_idx; + + // Extend two frames outside the current gf group. + for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) { + struct lookahead_entry *buf = + vp9_lookahead_peek(cpi->lookahead, frame_idx - 2); + + if (buf == NULL) break; + + cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex; + + gf_picture[frame_idx].frame = &buf->img; + gf_picture[frame_idx].ref_frame[0] = gld_index; + gf_picture[frame_idx].ref_frame[1] = lst_index; + gf_picture[frame_idx].ref_frame[2] = alt_index; + lst_index = frame_idx; + ++*tpl_group_frames; + ++extend_frame_count; + } +} + +void init_tpl_stats(VP9_COMP *cpi) { + int frame_idx; + for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + memset(tpl_frame->tpl_stats_ptr, 0, + tpl_frame->height * tpl_frame->width * + sizeof(*tpl_frame->tpl_stats_ptr)); + tpl_frame->is_valid = 0; + } +} + +uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, + uint8_t *cur_frame_buf, + uint8_t *ref_frame_buf, int stride, + MV *mv, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + const SEARCH_METHODS search_method = HEX; + int step_param; + int sadpb = x->sadperbit16; + uint32_t bestsme = UINT_MAX; + uint32_t distortion; + uint32_t sse; + int cost_list[5]; + const MvLimits tmp_mv_limits = x->mv_limits; + + MV best_ref_mv1 = { 0, 0 }; + MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ + + best_ref_mv1_full.col = best_ref_mv1.col >> 3; + best_ref_mv1_full.row = best_ref_mv1.row >> 3; + + // Setup frame pointers + x->plane[0].src.buf = cur_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = ref_frame_buf; + xd->plane[0].pre[0].stride = stride; + + step_param = mv_sf->reduce_first_step_size; + step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); + + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + + vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param, + search_method, sadpb, cond_cost_list(cpi, cost_list), + &best_ref_mv1, mv, 0, 0); + + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + + // Ignore mv costing by sending NULL pointer instead of cost array + bestsme = cpi->find_fractional_mv_step( + x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, mv_sf->subpel_iters_per_step, + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, + 0); + + return bestsme; +} + +int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row, + int ref_pos_col, int block, BLOCK_SIZE bsize) { + int width = 0, height = 0; + int bw = 4 << b_width_log2_lookup[bsize]; + int bh = 4 << b_height_log2_lookup[bsize]; + + switch (block) { + case 0: + width = grid_pos_col + bw - ref_pos_col; + height = grid_pos_row + bh - ref_pos_row; + break; + case 1: + width = ref_pos_col + bw - grid_pos_col; + height = grid_pos_row + bh - ref_pos_row; + break; + case 2: + width = grid_pos_col + bw - ref_pos_col; + height = ref_pos_row + bh - grid_pos_row; + break; + case 3: + width = ref_pos_col + bw - grid_pos_col; + height = ref_pos_row + bh - grid_pos_row; + break; + default: assert(0); + } + + return width * height; +} + +int round_floor(int ref_pos, int bsize_pix) { + int round; + if (ref_pos < 0) + round = -(1 + (-ref_pos - 1) / bsize_pix); + else + round = ref_pos / bsize_pix; + + return round; +} + +void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col, + BLOCK_SIZE bsize, int stride, int64_t intra_cost, + int64_t inter_cost, int ref_frame_idx, int_mv mv) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int idx, idy; + + intra_cost = intra_cost / (mi_height * mi_width); + inter_cost = inter_cost / (mi_height * mi_width); + + intra_cost = VPXMAX(1, intra_cost); + inter_cost = VPXMAX(1, inter_cost); + + for (idy = 0; idy < mi_height; ++idy) { + for (idx = 0; idx < mi_width; ++idx) { + TplDepStats *tpl_ptr = + &tpl_stats[(mi_row + idy) * stride + (mi_col + idx)]; + tpl_ptr->intra_cost = intra_cost; + tpl_ptr->inter_cost = inter_cost; + tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow; + tpl_ptr->ref_frame_index = ref_frame_idx; + tpl_ptr->mv.as_int = mv.as_int; + } + } +} + +void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats, + int mi_row, int mi_col, const BLOCK_SIZE bsize) { + TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index]; + TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr; + MV mv = tpl_stats->mv.as_mv; + int mv_row = mv.row >> 3; + int mv_col = mv.col >> 3; + + int ref_pos_row = mi_row * MI_SIZE + mv_row; + int ref_pos_col = mi_col * MI_SIZE + mv_col; + + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int pix_num = bw * bh; + + // top-left on grid block location in pixel + int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh; + int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw; + int block; + + for (block = 0; block < 4; ++block) { + int grid_pos_row = grid_pos_row_base + bh * (block >> 1); + int grid_pos_col = grid_pos_col_base + bw * (block & 0x01); + + if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE && + grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) { + int overlap_area = get_overlap_area( + grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize); + int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height; + int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width; + + int64_t mc_flow = tpl_stats->mc_dep_cost - + (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) / + tpl_stats->intra_cost; + + int idx, idy; + + for (idy = 0; idy < mi_height; ++idy) { + for (idx = 0; idx < mi_width; ++idx) { + TplDepStats *des_stats = + &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride + + (ref_mi_col + idx)]; + + des_stats->mc_flow += (mc_flow * overlap_area) / pix_num; + des_stats->mc_ref_cost += + ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) / + pix_num; + assert(overlap_area >= 0); + } + } + } + } +} + +void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats, + int mi_row, int mi_col, const BLOCK_SIZE bsize) { + int idx, idy; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + + for (idy = 0; idy < mi_height; ++idy) { + for (idx = 0; idx < mi_width; ++idx) { + TplDepStats *tpl_ptr = + &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)]; + tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx, + BLOCK_8X8); + } + } +} + +void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, + tran_low_t *qcoeff, tran_low_t *dqcoeff, + TX_SIZE tx_size, int64_t *recon_error, int64_t *sse) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; + uint16_t eob; + int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; + const int shift = tx_size == TX_32X32 ? 0 : 2; + + vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, + qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, + scan_order->iscan); + + *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift; + *recon_error = VPXMAX(*recon_error, 1); + + *sse = (*sse) >> shift; + *sse = VPXMAX(*sse, 1); +} + +void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size) { + switch (tx_size) { + case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break; + case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break; + case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break; + default: assert(0); + } +} + +void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, + struct scale_factors *sf, GF_PICTURE *gf_picture, + int frame_idx, int16_t *src_diff, tran_low_t *coeff, + tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row, + int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size, + YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor, + int64_t *recon_error, int64_t *sse, + TplDepStats *tpl_stats) { + VP9_COMMON *cm = &cpi->common; + ThreadData *td = &cpi->td; + + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int pix_num = bw * bh; + int best_rf_idx = -1; + int_mv best_mv; + int64_t best_inter_cost = INT64_MAX; + int64_t inter_cost; + int rf_idx; + const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; + + int64_t best_intra_cost = INT64_MAX; + int64_t intra_cost; + PREDICTION_MODE mode; + int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + MODE_INFO mi_above, mi_left; + + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8; + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8; + xd->above_mi = (mi_row > 0) ? &mi_above : NULL; + xd->left_mi = (mi_col > 0) ? &mi_left : NULL; + + // Intra prediction search + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + uint8_t *src, *dst; + int src_stride, dst_stride; + + src = xd->cur_buf->y_buffer + mb_y_offset; + src_stride = xd->cur_buf->y_stride; + + dst = &predictor[0]; + dst_stride = bw; + + xd->mi[0]->sb_type = bsize; + xd->mi[0]->ref_frame[0] = INTRA_FRAME; + + vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src, + src_stride, dst, dst_stride, 0, 0, 0); + + vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); + + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + + intra_cost = vpx_satd(coeff, pix_num); + + if (intra_cost < best_intra_cost) best_intra_cost = intra_cost; + } + + // Motion compensated prediction + best_mv.as_int = 0; + + (void)mb_y_offset; + // Motion estimation column boundary + x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); + x->mv_limits.col_max = + ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND); + + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + int_mv mv; + if (ref_frame[rf_idx] == NULL) continue; + + motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &mv.as_mv, bsize); + + // TODO(jingning): Not yet support high bit-depth in the next three + // steps. +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset), + ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw, + &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, + mi_row * MI_SIZE, xd->bd); + vpx_highbd_subtract_block( + bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); + } else { + vp9_build_inter_predictor( + ref_frame[rf_idx]->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh, + 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); + vpx_subtract_block(bh, bw, src_diff, bw, + xd->cur_buf->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &predictor[0], bw); + } +#else + vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_stride, &predictor[0], bw, + &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE, mi_row * MI_SIZE); + vpx_subtract_block(bh, bw, src_diff, bw, + xd->cur_buf->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &predictor[0], bw); +#endif + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + + inter_cost = vpx_satd(coeff, pix_num); + + if (inter_cost < best_inter_cost) { + best_rf_idx = rf_idx; + best_inter_cost = inter_cost; + best_mv.as_int = mv.as_int; + get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error, + sse); + } + } + best_intra_cost = VPXMAX(best_intra_cost, 1); + best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost); + tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2; + tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2; + tpl_stats->mc_dep_cost = tpl_stats->intra_cost + tpl_stats->mc_flow; + tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx]; + tpl_stats->mv.as_int = best_mv.as_int; +} + +void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame; + YV12_BUFFER_CONFIG *ref_frame[3] = { NULL, NULL, NULL }; + + VP9_COMMON *cm = &cpi->common; + struct scale_factors sf; + int rdmult, idx; + ThreadData *td = &cpi->td; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + int mi_row, mi_col; + +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]); + DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]); + uint8_t *predictor; +#else + DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]); +#endif + DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); + + const BLOCK_SIZE bsize = BLOCK_32X32; + const TX_SIZE tx_size = max_txsize_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int64_t recon_error, sse; + + // Setup scaling factor +#if CONFIG_VP9_HIGHBITDEPTH + vp9_setup_scale_factors_for_frame( + &sf, this_frame->y_crop_width, this_frame->y_crop_height, + this_frame->y_crop_width, this_frame->y_crop_height, + cpi->common.use_highbitdepth); + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + predictor = CONVERT_TO_BYTEPTR(predictor16); + else + predictor = predictor8; +#else + vp9_setup_scale_factors_for_frame( + &sf, this_frame->y_crop_width, this_frame->y_crop_height, + this_frame->y_crop_width, this_frame->y_crop_height); +#endif // CONFIG_VP9_HIGHBITDEPTH + + // Prepare reference frame pointers. If any reference frame slot is + // unavailable, the pointer will be set to Null. + for (idx = 0; idx < 3; ++idx) { + int rf_idx = gf_picture[frame_idx].ref_frame[idx]; + if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame; + } + + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; + + // Get rd multiplier set up. + rdmult = + (int)vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex); + if (rdmult < 1) rdmult = 1; + set_error_per_bit(&cpi->td.mb, rdmult); + vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex); + + tpl_frame->is_valid = 1; + + cm->base_qindex = tpl_frame->base_qindex; + vp9_frame_init_quantizer(cpi); + + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + // Motion estimation row boundary + x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); + x->mv_limits.row_max = + (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND); + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + TplDepStats tpl_stats; + xd->cur_buf = this_frame; + mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, src_diff, coeff, + qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size, + ref_frame, predictor, &recon_error, &sse, &tpl_stats); + + // Motion flow dependency dispenser. + tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, + tpl_frame->stride, tpl_stats.intra_cost, + tpl_stats.inter_cost, tpl_stats.ref_frame_index, + tpl_stats.mv); + + tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col, + bsize); + } + } +} + +void setup_tpl_stats(VP9_COMP *cpi) { + GF_PICTURE gf_picture[MAX_LAG_BUFFERS]; + const GF_GROUP *gf_group = &cpi->twopass.gf_group; + int tpl_group_frames = 0; + int frame_idx; + + // TODO(jingning): Make the model support high bit-depth route. +#if CONFIG_VP9_HIGHBITDEPTH + (void)gf_picture; + (void)gf_group; + (void)tpl_group_frames; + (void)frame_idx; + return; +#endif + + init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames); + + init_tpl_stats(cpi); + + // Backward propagation from tpl_group_frames to 1. + for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) + mc_flow_dispenser(cpi, gf_picture, frame_idx); +} + int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { @@ -5289,7 +6139,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Read in the source frame. - if (cpi->use_svc) + if (cpi->use_svc || cpi->svc.set_intra_only_frame) source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); else source = vp9_lookahead_pop(cpi->lookahead, flush); @@ -5299,8 +6149,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cm->intra_only = 0; // if the flags indicate intra frame, but if the current picture is for // non-zero spatial layer, it should not be an intra picture. - if ((source->flags & VPX_EFLAG_FORCE_KF) && - cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) { + if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc && + cpi->svc.spatial_layer_id > 0) { source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF); } @@ -5395,6 +6245,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; } + if (arf_src_index && cpi->sf.enable_tpl_model) { + vp9_estimate_qp_gop(cpi); + setup_tpl_stats(cpi); + } + cpi->td.mb.fp_src_pred = 0; #if CONFIG_REALTIME_ONLY if (cpi->use_svc) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 1e0ed70fb..0c6375c65 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -278,10 +278,37 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; } +typedef struct TplDepStats { + int64_t intra_cost; + int64_t inter_cost; + int64_t mc_flow; + int64_t mc_dep_cost; + int64_t mc_ref_cost; + + int ref_frame_index; + int_mv mv; +} TplDepStats; + +typedef struct TplDepFrame { + uint8_t is_valid; + TplDepStats *tpl_stats_ptr; + int stride; + int width; + int height; + int mi_rows; + int mi_cols; + int base_qindex; +} TplDepFrame; + +#define TPL_DEP_COST_SCALE_LOG2 4 + // TODO(jingning) All spatially adaptive variables should go to TileDataEnc. typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; +#if CONFIG_CONSISTENT_RECODE + int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES]; +#endif int8_t mode_map[BLOCK_SIZES][MAX_MODES]; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; @@ -473,6 +500,8 @@ typedef struct VP9_COMP { #endif YV12_BUFFER_CONFIG *raw_source_frame; + TplDepFrame tpl_stats[MAX_LAG_BUFFERS]; + TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. @@ -484,8 +513,13 @@ typedef struct VP9_COMP { int gld_fb_idx; int alt_fb_idx; + int ref_fb_idx[REF_FRAMES]; + int last_show_frame_buf_idx; // last show frame buffer index + int refresh_last_frame; int refresh_golden_frame; + int refresh_bwd_ref_frame; + int refresh_alt2_ref_frame; int refresh_alt_ref_frame; int ext_refresh_frame_flags_pending; @@ -499,7 +533,6 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tile_tok[4][1 << 6]; - uint32_t tok_count[4][1 << 6]; TOKENLIST *tplist[4][1 << 6]; // Ambient reconstruction err target for force key frames @@ -521,7 +554,7 @@ typedef struct VP9_COMP { RATE_CONTROL rc; double framerate; - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]; + int interp_filter_selected[REF_FRAMES][SWITCHABLE]; struct vpx_codec_pkt_list *output_pkt_list; @@ -726,6 +759,13 @@ typedef struct VP9_COMP { uint8_t *count_arf_frame_usage; uint8_t *count_lastgolden_frame_usage; + // Parameters on multi-layer ALTREFs + int num_extra_arfs; + int arf_map[MAX_EXT_ARFS + 1]; + int arf_pos_in_gf[MAX_EXT_ARFS + 1]; + int arf_pos_for_ovrly[MAX_EXT_ARFS + 1]; + int extra_arf_allowed; + vpx_roi_map_t roi; } VP9_COMP; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 453879fb8..fc1ecd6ce 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -44,7 +44,6 @@ #define COMPLEXITY_STATS_OUTPUT 0 #define FIRST_PASS_Q 10.0 -#define INTRA_MODE_PENALTY 1024 #define MIN_ARF_GF_BOOST 240 #define MIN_DECAY_FACTOR 0.01 #define NEW_MV_MODE_PENALTY 32 @@ -812,6 +811,8 @@ static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile, fp_acc_data->image_data_start_row); } +#define NZ_MOTION_PENALTY 128 +#define INTRA_MODE_PENALTY 1024 void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, FIRSTPASS_DATA *fp_acc_data, TileDataEnc *tile_data, MV *best_ref_mv, @@ -1059,7 +1060,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, // Compute the motion error of the 0,0 motion using the last source // frame as the reference. Skip the further motion search on - // reconstructed frame if this error is small. + // reconstructed frame if this error is very small. unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer + recon_yoffset; unscaled_last_source_buf_2d.stride = cpi->unscaled_last_source->y_stride; @@ -1076,8 +1077,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, &unscaled_last_source_buf_2d); #endif // CONFIG_VP9_HIGHBITDEPTH - // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25) { + if (raw_motion_error > NZ_MOTION_PENALTY) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error); @@ -2115,34 +2115,234 @@ static double calculate_group_score(VP9_COMP *cpi, double av_score, ++s; ++i; } - assert(i == frame_count); return score_total; } -static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, - int gf_arf_bits) { - VP9EncoderConfig *const oxcf = &cpi->oxcf; +static void define_gf_multi_arf_structure(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; - FIRSTPASS_STATS frame_stats; int i; - int frame_index = 1; - int target_frame_size; + int frame_index = 0; + const int key_frame = cpi->common.frame_type == KEY_FRAME; + + // The use of bi-predictive frames are only enabled when following 3 + // conditions are met: + // (1) ALTREF is enabled; + // (2) The bi-predictive group interval is at least 2; and + // (3) The bi-predictive group interval is strictly smaller than the + // golden group interval. + const int is_bipred_enabled = + cpi->extra_arf_allowed && rc->source_alt_ref_pending && + rc->bipred_group_interval && + rc->bipred_group_interval <= + (rc->baseline_gf_interval - rc->source_alt_ref_pending); + int bipred_group_end = 0; + int bipred_frame_index = 0; + + const unsigned char ext_arf_interval = + (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1); + int which_arf = cpi->num_extra_arfs; + int subgroup_interval[MAX_EXT_ARFS + 1]; + int is_sg_bipred_enabled = is_bipred_enabled; + int accumulative_subgroup_interval = 0; + + // For key frames the frame target rate is already set and it + // is also the golden frame. + // === [frame_index == 0] === + if (!key_frame) { + if (rc->source_alt_ref_active) { + gf_group->update_type[frame_index] = OVERLAY_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + } else { + gf_group->update_type[frame_index] = GF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + } + gf_group->arf_update_idx[frame_index] = 0; + gf_group->arf_ref_idx[frame_index] = 0; + } + + gf_group->bidir_pred_enabled[frame_index] = 0; + gf_group->brf_src_offset[frame_index] = 0; + + frame_index++; + + bipred_frame_index++; + + // === [frame_index == 1] === + if (rc->source_alt_ref_pending) { + gf_group->update_type[frame_index] = ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + gf_group->arf_src_offset[frame_index] = + (unsigned char)(rc->baseline_gf_interval - 1); + + gf_group->arf_update_idx[frame_index] = 0; + gf_group->arf_ref_idx[frame_index] = 0; + + gf_group->bidir_pred_enabled[frame_index] = 0; + gf_group->brf_src_offset[frame_index] = 0; + // NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames. + + // Work out the ARFs' positions in this gf group + // NOTE: ALT_REFs' are indexed inversely, but coded in display order + // (except for the original ARF). In the example of three ALT_REF's, + // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0 + // but code them in the following order: + // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0 + // + // arf_pos_for_ovrly[]: Position for OVERLAY + // arf_pos_in_gf[]: Position for ALTREF + cpi->arf_pos_for_ovrly[0] = frame_index + cpi->num_extra_arfs + + gf_group->arf_src_offset[frame_index] + 1; + for (i = 0; i < cpi->num_extra_arfs; ++i) { + cpi->arf_pos_for_ovrly[i + 1] = + frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2); + subgroup_interval[i] = cpi->arf_pos_for_ovrly[i] - + cpi->arf_pos_for_ovrly[i + 1] - (i == 0 ? 1 : 2); + } + subgroup_interval[cpi->num_extra_arfs] = + cpi->arf_pos_for_ovrly[cpi->num_extra_arfs] - frame_index - + (cpi->num_extra_arfs == 0 ? 1 : 2); + + ++frame_index; + + // Insert an extra ARF + // === [frame_index == 2] === + if (cpi->num_extra_arfs) { + gf_group->update_type[frame_index] = INTNL_ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_LOW; + gf_group->arf_src_offset[frame_index] = ext_arf_interval; + + gf_group->arf_update_idx[frame_index] = which_arf; + gf_group->arf_ref_idx[frame_index] = 0; + ++frame_index; + } + accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs]; + } + + for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) { + gf_group->arf_update_idx[frame_index] = which_arf; + gf_group->arf_ref_idx[frame_index] = which_arf; + + // If we are going to have ARFs, check whether we can have BWDREF in this + // subgroup, and further, whether we can have ARF subgroup which contains + // the BWDREF subgroup but contained within the GF group: + // + // GF group --> ARF subgroup --> BWDREF subgroup + if (rc->source_alt_ref_pending) { + is_sg_bipred_enabled = + is_bipred_enabled && + (subgroup_interval[which_arf] > rc->bipred_group_interval); + } + + // NOTE: 1. BIDIR_PRED is only enabled when the length of the bi-predictive + // frame group interval is strictly smaller than that of the GOLDEN + // FRAME group interval. + // 2. Currently BIDIR_PRED is only enabled when alt-ref is on. + if (is_sg_bipred_enabled && !bipred_group_end) { + const int cur_brf_src_offset = rc->bipred_group_interval - 1; + + if (bipred_frame_index == 1) { + // --- BRF_UPDATE --- + gf_group->update_type[frame_index] = BRF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_LOW; + gf_group->brf_src_offset[frame_index] = cur_brf_src_offset; + } else if (bipred_frame_index == rc->bipred_group_interval) { + // --- LAST_BIPRED_UPDATE --- + gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + gf_group->brf_src_offset[frame_index] = 0; + + // Reset the bi-predictive frame index. + bipred_frame_index = 0; + } else { + // --- BIPRED_UPDATE --- + gf_group->update_type[frame_index] = BIPRED_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + gf_group->brf_src_offset[frame_index] = 0; + } + gf_group->bidir_pred_enabled[frame_index] = 1; + + bipred_frame_index++; + // Check whether the next bi-predictive frame group would entirely be + // included within the current golden frame group. + // In addition, we need to avoid coding a BRF right before an ARF. + if (bipred_frame_index == 1 && + (i + 2 + cur_brf_src_offset) >= accumulative_subgroup_interval) { + bipred_group_end = 1; + } + } else { + gf_group->update_type[frame_index] = LF_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + gf_group->bidir_pred_enabled[frame_index] = 0; + gf_group->brf_src_offset[frame_index] = 0; + } + + ++frame_index; + + // Check if we need to update the ARF. + if (is_sg_bipred_enabled && cpi->num_extra_arfs && which_arf > 0 && + frame_index > cpi->arf_pos_for_ovrly[which_arf]) { + --which_arf; + accumulative_subgroup_interval += subgroup_interval[which_arf] + 1; + + // Meet the new subgroup; Reset the bipred_group_end flag. + bipred_group_end = 0; + // Insert another extra ARF after the overlay frame + if (which_arf) { + gf_group->update_type[frame_index] = INTNL_ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_LOW; + gf_group->arf_src_offset[frame_index] = ext_arf_interval; + + gf_group->arf_update_idx[frame_index] = which_arf; + gf_group->arf_ref_idx[frame_index] = 0; + ++frame_index; + } + } + } + + // NOTE: We need to configure the frame at the end of the sequence + 1 that + // is the start frame for the next group. Otherwise prior to the call to + // av1_rc_get_second_pass_params() the data will be undefined. + gf_group->arf_update_idx[frame_index] = 0; + gf_group->arf_ref_idx[frame_index] = 0; + + if (rc->source_alt_ref_pending) { + gf_group->update_type[frame_index] = OVERLAY_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + + cpi->arf_pos_in_gf[0] = 1; + if (cpi->num_extra_arfs) { + // Overwrite the update_type for extra-ARF's corresponding internal + // OVERLAY's: Change from LF_UPDATE to INTNL_OVERLAY_UPDATE. + for (i = cpi->num_extra_arfs; i > 0; --i) { + cpi->arf_pos_in_gf[i] = + (i == cpi->num_extra_arfs ? 2 : cpi->arf_pos_for_ovrly[i + 1] + 1); + + gf_group->update_type[cpi->arf_pos_for_ovrly[i]] = INTNL_OVERLAY_UPDATE; + gf_group->rf_level[cpi->arf_pos_for_ovrly[i]] = INTER_NORMAL; + } + } + } else { + gf_group->update_type[frame_index] = GF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + } + + gf_group->bidir_pred_enabled[frame_index] = 0; + gf_group->brf_src_offset[frame_index] = 0; +} + +static void define_gf_group_structure(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; + int i; + int frame_index = 0; int key_frame; - const int max_bits = frame_max_bits(&cpi->rc, oxcf); - int64_t total_group_bits = gf_group_bits; - int mid_boost_bits = 0; int mid_frame_idx; unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS]; - int alt_frame_index = frame_index; int normal_frames; - int normal_frame_bits; - int last_frame_reduction = 0; - double av_score = 1.0; - double tot_norm_frame_score = 1.0; - double this_frame_score = 1.0; key_frame = cpi->common.frame_type == KEY_FRAME; @@ -2150,35 +2350,31 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // For key frames the frame target rate is already set and it // is also the golden frame. + // === [frame_index == 0] === if (!key_frame) { if (rc->source_alt_ref_active) { - gf_group->update_type[0] = OVERLAY_UPDATE; - gf_group->rf_level[0] = INTER_NORMAL; - gf_group->bit_allocation[0] = 0; + gf_group->update_type[frame_index] = OVERLAY_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; } else { - gf_group->update_type[0] = GF_UPDATE; - gf_group->rf_level[0] = GF_ARF_STD; - gf_group->bit_allocation[0] = gf_arf_bits; + gf_group->update_type[frame_index] = GF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; } - gf_group->arf_update_idx[0] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; } - // Deduct the boost bits for arf (or gf if it is not a key frame) - // from the group total. - if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; + ++frame_index; - // Store the bits to spend on the ARF if there is one. + // === [frame_index == 1] === if (rc->source_alt_ref_pending) { - gf_group->update_type[alt_frame_index] = ARF_UPDATE; - gf_group->rf_level[alt_frame_index] = GF_ARF_STD; - gf_group->bit_allocation[alt_frame_index] = gf_arf_bits; + gf_group->update_type[frame_index] = ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; - gf_group->arf_src_offset[alt_frame_index] = + gf_group->arf_src_offset[frame_index] = (unsigned char)(rc->baseline_gf_interval - 1); - gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[alt_frame_index] = + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[cpi->multi_arf_last_grp_enabled && rc->source_alt_ref_active]; ++frame_index; @@ -2203,6 +2399,103 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, normal_frames = rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending); + for (i = 0; i < normal_frames; ++i) { + int arf_idx = 0; + if (twopass->stats_in >= twopass->stats_in_end) break; + + if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) { + if (frame_index <= mid_frame_idx) arf_idx = 1; + } + + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; + + gf_group->update_type[frame_index] = LF_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + + ++frame_index; + } + + // Note: + // We need to configure the frame at the end of the sequence + 1 that will be + // the start frame for the next group. Otherwise prior to the call to + // vp9_rc_get_second_pass_params() the data will be undefined. + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; + + if (rc->source_alt_ref_pending) { + gf_group->update_type[frame_index] = OVERLAY_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + + // Final setup for second arf and its overlay. + if (cpi->multi_arf_enabled) + gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE; + } else { + gf_group->update_type[frame_index] = GF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + } + + // Note whether multi-arf was enabled this group for next time. + cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled; +} + +static void allocate_gf_multi_arf_bits(VP9_COMP *cpi, int64_t gf_group_bits, + int gf_arf_bits) { + VP9EncoderConfig *const oxcf = &cpi->oxcf; + RATE_CONTROL *const rc = &cpi->rc; + TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; + FIRSTPASS_STATS frame_stats; + int i; + int frame_index = 0; + int target_frame_size; + int key_frame; + const int max_bits = frame_max_bits(&cpi->rc, oxcf); + int64_t total_group_bits = gf_group_bits; + int normal_frames; + int normal_frame_bits; + int last_frame_reduction = 0; + double av_score = 1.0; + double tot_norm_frame_score = 1.0; + double this_frame_score = 1.0; + + // Define the GF structure and specify + define_gf_multi_arf_structure(cpi); + + //======================================== + + key_frame = cpi->common.frame_type == KEY_FRAME; + + // For key frames the frame target rate is already set and it + // is also the golden frame. + // === [frame_index == 0] === + if (!key_frame) { + gf_group->bit_allocation[frame_index] = + rc->source_alt_ref_active ? 0 : gf_arf_bits; + } + + // Deduct the boost bits for arf (or gf if it is not a key frame) + // from the group total. + if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; + + ++frame_index; + + // === [frame_index == 1] === + // Store the bits to spend on the ARF if there is one. + if (rc->source_alt_ref_pending) { + gf_group->bit_allocation[frame_index] = gf_arf_bits; + + ++frame_index; + + // Skip all the extra-ARF's right after ARF at the starting segment of + // the current GF group. + if (cpi->num_extra_arfs) { + while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE) + ++frame_index; + } + } + + normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending); if (normal_frames > 1) normal_frame_bits = (int)(total_group_bits / normal_frames); else @@ -2215,8 +2508,8 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // Allocate bits to the other frames in the group. for (i = 0; i < normal_frames; ++i) { - int arf_idx = 0; if (EOF == input_stats(twopass, &frame_stats)) break; + if (oxcf->vbr_corpus_complexity) { this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf, &frame_stats, av_score); @@ -2230,20 +2523,156 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, target_frame_size -= last_frame_reduction; } + // TODO(zoeliu): Further check whether following is needed for + // hierarchical GF group structure. if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) { - mid_boost_bits += (target_frame_size >> 4); target_frame_size -= (target_frame_size >> 4); - - if (frame_index <= mid_frame_idx) arf_idx = 1; } - gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx]; - gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; target_frame_size = clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits)); - gf_group->update_type[frame_index] = LF_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; + if (gf_group->update_type[frame_index] == BRF_UPDATE) { + // Boost up the allocated bits on BWDREF_FRAME + gf_group->bit_allocation[frame_index] = + target_frame_size + (target_frame_size >> 2); + } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) { + // Press down the allocated bits on LAST_BIPRED_UPDATE frames + gf_group->bit_allocation[frame_index] = + target_frame_size - (target_frame_size >> 1); + } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) { + // TODO(zoeliu): Investigate whether the allocated bits on BIPRED_UPDATE + // frames need to be further adjusted. + gf_group->bit_allocation[frame_index] = target_frame_size; + } else { + assert(gf_group->update_type[frame_index] == LF_UPDATE || + gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE); + gf_group->bit_allocation[frame_index] = target_frame_size; + } + + ++frame_index; + + // Skip all the extra-ARF's. + if (cpi->num_extra_arfs) { + while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE) + ++frame_index; + } + } + + // NOTE: We need to configure the frame at the end of the sequence + 1 that + // will be the start frame for the next group. Otherwise prior to the + // call to av1_rc_get_second_pass_params() the data will be undefined. + if (rc->source_alt_ref_pending) { + if (cpi->num_extra_arfs) { + // NOTE: For bit allocation, move the allocated bits associated with + // INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE. + // i > 0 for extra-ARF's and i == 0 for ARF: + // arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE + // arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE + for (i = cpi->num_extra_arfs; i > 0; --i) { + assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] == + INTNL_OVERLAY_UPDATE); + + // Encoder's choice: + // Set show_existing_frame == 1 for all extra-ARF's, and hence + // allocate zero bit for both all internal OVERLAY frames. + gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] = + gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]]; + gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0; + } + } + } +} + +static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, + int gf_arf_bits) { + VP9EncoderConfig *const oxcf = &cpi->oxcf; + RATE_CONTROL *const rc = &cpi->rc; + TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; + FIRSTPASS_STATS frame_stats; + int i; + int frame_index = 0; + int target_frame_size; + int key_frame; + const int max_bits = frame_max_bits(&cpi->rc, oxcf); + int64_t total_group_bits = gf_group_bits; + int mid_boost_bits = 0; + int mid_frame_idx; + int normal_frames; + int normal_frame_bits; + int last_frame_reduction = 0; + double av_score = 1.0; + double tot_norm_frame_score = 1.0; + double this_frame_score = 1.0; + + // Define the GF structure and specify + define_gf_group_structure(cpi); + + key_frame = cpi->common.frame_type == KEY_FRAME; + + // For key frames the frame target rate is already set and it + // is also the golden frame. + // === [frame_index == 0] === + if (!key_frame) { + gf_group->bit_allocation[frame_index] = + rc->source_alt_ref_active ? 0 : gf_arf_bits; + } + + // Deduct the boost bits for arf (or gf if it is not a key frame) + // from the group total. + if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; + + ++frame_index; + + // === [frame_index == 1] === + // Store the bits to spend on the ARF if there is one. + if (rc->source_alt_ref_pending) { + gf_group->bit_allocation[frame_index] = gf_arf_bits; + + ++frame_index; + + // Set aside a slot for a level 1 arf. + if (cpi->multi_arf_enabled) ++frame_index; + } + + // Define middle frame + mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1; + + normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending); + if (normal_frames > 1) + normal_frame_bits = (int)(total_group_bits / normal_frames); + else + normal_frame_bits = (int)total_group_bits; + + if (oxcf->vbr_corpus_complexity) { + av_score = get_distribution_av_err(cpi, twopass); + tot_norm_frame_score = calculate_group_score(cpi, av_score, normal_frames); + } + + // Allocate bits to the other frames in the group. + for (i = 0; i < normal_frames; ++i) { + if (EOF == input_stats(twopass, &frame_stats)) break; + if (oxcf->vbr_corpus_complexity) { + this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf, + &frame_stats, av_score); + normal_frame_bits = (int)((double)total_group_bits * + (this_frame_score / tot_norm_frame_score)); + } + + target_frame_size = normal_frame_bits; + if ((i == (normal_frames - 1)) && (i >= 1)) { + last_frame_reduction = normal_frame_bits / 16; + target_frame_size -= last_frame_reduction; + } + + if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) { + mid_boost_bits += (target_frame_size >> 4); + target_frame_size -= (target_frame_size >> 4); + } + + target_frame_size = + clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits)); gf_group->bit_allocation[frame_index] = target_frame_size; ++frame_index; @@ -2256,27 +2685,15 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // We need to configure the frame at the end of the sequence + 1 that will be // the start frame for the next group. Otherwise prior to the call to // vp9_rc_get_second_pass_params() the data will be undefined. - gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - // Final setup for second arf and its overlay. if (cpi->multi_arf_enabled) { gf_group->bit_allocation[2] = gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits; - gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE; gf_group->bit_allocation[mid_frame_idx] = 0; } - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; } - - // Note whether multi-arf was enabled this group for next time. - cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled; } // Adjusts the ARNF filter for a GF group. @@ -2297,6 +2714,7 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise, #define ARF_DECAY_BREAKOUT 0.10 #define ARF_ABS_ZOOM_THRESH 4.0 +#define MAX_GF_BOOST 5400 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -2338,6 +2756,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { const int is_key_frame = frame_is_intra_only(cm); const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; + int disable_bwd_extarf; + // Reset the GF group data structures unless this is a key // frame in which case it will already have been done. if (is_key_frame == 0) { @@ -2442,7 +2862,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Monitor for static sections. if ((rc->frames_since_key + i - 1) > 1) { - zero_motion_accumulator *= get_zero_motion_factor(cpi, &next_frame); + zero_motion_accumulator = VPXMIN( + zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); } // Break clause to detect very still sections after motion. For example, @@ -2496,6 +2917,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Should we use the alternate reference frame. if ((zero_motion_accumulator < 0.995) && allow_alt_ref && + (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) { const int forward_frames = (rc->frames_to_key - i >= i - 1) ? i - 1 @@ -2512,7 +2934,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { ? 1 : 0; } else { - rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1)); + rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, 0, (i - 1))); rc->source_alt_ref_pending = 0; } @@ -2531,6 +2953,39 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->frames_till_gf_update_due = rc->baseline_gf_interval; + // TODO(zoeliu): Turn on the option to disable extra ALTREFs for still GF + // groups. + // Disable extra altrefs for "still" gf group: + // zero_motion_accumulator: minimum percentage of (0,0) motion; + // avg_sr_coded_error: average of the SSE per pixel of each frame; + // avg_raw_err_stdev: average of the standard deviation of (0,0) + // motion error per block of each frame. +#if 0 + assert(num_mbs > 0); + disable_bwd_extarf = + (zero_motion_accumulator > MIN_ZERO_MOTION && + avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR && + avg_raw_err_stdev < MAX_RAW_ERR_VAR); +#else + disable_bwd_extarf = 0; +#endif // 0 + + if (disable_bwd_extarf) cpi->extra_arf_allowed = 0; + + if (!cpi->extra_arf_allowed) { + cpi->num_extra_arfs = 0; + } else { + // Compute how many extra alt_refs we can have + cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval, + rc->source_alt_ref_pending); + } + // Currently at maximum two extra ARFs' are allowed + assert(cpi->num_extra_arfs <= MAX_EXT_ARFS); + + rc->bipred_group_interval = BFG_INTERVAL; + // The minimum bi-predictive frame group interval is 2. + if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0; + // Reset the file position. reset_fpf_position(twopass, start_pos); @@ -2582,7 +3037,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_error_left -= gf_group_err; // Allocate bits to each of the frames in the GF group. - allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits); + if (cpi->extra_arf_allowed) { + allocate_gf_multi_arf_bits(cpi, gf_group_bits, gf_arf_bits); + } else { + allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits); + } // Reset the file position. reset_fpf_position(twopass, start_pos); @@ -2974,7 +3433,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special case for static / slide show content but dont apply // if the kf group is very short. if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) { - rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST); + rc->kf_boost = MAX_KF_TOT_BOOST; } else { // Apply various clamps for min and max boost rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3)); @@ -3008,39 +3467,109 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } // Define the reference buffers that will be updated post encode. -static void configure_buffer_updates(VP9_COMP *cpi) { +static void configure_multi_arf_buffer_updates(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; cpi->rc.is_src_frame_alt_ref = 0; + cpi->rc.is_bwd_ref_frame = 0; + cpi->rc.is_last_bipred_frame = 0; + cpi->rc.is_bipred_frame = 0; + cpi->rc.is_src_frame_ext_arf = 0; + switch (twopass->gf_group.update_type[twopass->gf_group.index]) { case KF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 1; + cpi->refresh_bwd_ref_frame = 1; + cpi->refresh_alt2_ref_frame = 1; cpi->refresh_alt_ref_frame = 1; break; + case LF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; cpi->refresh_alt_ref_frame = 0; break; + case GF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 1; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; cpi->refresh_alt_ref_frame = 0; break; + case OVERLAY_UPDATE: cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 1; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; cpi->refresh_alt_ref_frame = 0; + cpi->rc.is_src_frame_alt_ref = 1; break; - default: - assert(twopass->gf_group.update_type[twopass->gf_group.index] == - ARF_UPDATE); + + case ARF_UPDATE: cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 0; + // NOTE: BWDREF does not get updated along with ALTREF_FRAME. + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; cpi->refresh_alt_ref_frame = 1; break; + + case BRF_UPDATE: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 0; + cpi->refresh_bwd_ref_frame = 1; + cpi->refresh_alt2_ref_frame = 0; + cpi->refresh_alt_ref_frame = 0; + + cpi->rc.is_bwd_ref_frame = 1; + break; + + case LAST_BIPRED_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; + cpi->refresh_alt_ref_frame = 0; + + cpi->rc.is_last_bipred_frame = 1; + break; + + case BIPRED_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; + cpi->refresh_alt_ref_frame = 0; + + cpi->rc.is_bipred_frame = 1; + break; + + case INTNL_OVERLAY_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 0; + cpi->refresh_alt_ref_frame = 0; + + cpi->rc.is_src_frame_alt_ref = 1; + cpi->rc.is_src_frame_ext_arf = 1; + break; + + case INTNL_ARF_UPDATE: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 0; + cpi->refresh_bwd_ref_frame = 0; + cpi->refresh_alt2_ref_frame = 1; + cpi->refresh_alt_ref_frame = 0; + break; + + default: assert(0); break; } } @@ -3070,15 +3599,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS this_frame; - int target_rate; - if (!twopass->stats_in) return; // If this is an arf frame then we dont want to read the stats file or // advance the input pointer as we already have what we need. if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { int target_rate; - configure_buffer_updates(cpi); + + if (cpi->extra_arf_allowed) { + configure_multi_arf_buffer_updates(cpi); + } else { + vp9_configure_buffer_updates(cpi, gf_group->index); + } + target_rate = gf_group->bit_allocation[gf_group->index]; target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); rc->base_frame_target = target_rate; @@ -3170,7 +3703,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { #endif } - configure_buffer_updates(cpi); + if (cpi->extra_arf_allowed) { + configure_multi_arf_buffer_updates(cpi); + } else { + vp9_configure_buffer_updates(cpi, gf_group->index); + } // Do the firstpass stats indicate that this frame is skippable for the // partition search? @@ -3179,8 +3716,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } - target_rate = gf_group->bit_allocation[gf_group->index]; - rc->base_frame_target = target_rate; + rc->base_frame_target = gf_group->bit_allocation[gf_group->index]; // The multiplication by 256 reverses a scaling factor of (>> 8) // applied when combining MB error values for the frame. diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index aa497e3da..271791324 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -11,6 +11,8 @@ #ifndef VP9_ENCODER_VP9_FIRSTPASS_H_ #define VP9_ENCODER_VP9_FIRSTPASS_H_ +#include <assert.h> + #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_ratectrl.h" @@ -41,6 +43,13 @@ typedef struct { #define INVALID_ROW -1 +// Length of the bi-predictive frame group (BFG) +// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain +// number of bi-predictive frames. +#define BFG_INTERVAL 2 +#define MAX_EXT_ARFS 2 +#define MIN_EXT_ARF_INTERVAL 4 + typedef struct { double frame_mb_intra_factor; double frame_mb_brightness_factor; @@ -107,7 +116,12 @@ typedef enum { GF_UPDATE = 2, ARF_UPDATE = 3, OVERLAY_UPDATE = 4, - FRAME_UPDATE_TYPES = 5 + BRF_UPDATE = 5, // Backward Reference Frame + LAST_BIPRED_UPDATE = 6, // Last Bi-predictive Frame + BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one + INTNL_OVERLAY_UPDATE = 8, // Internal Overlay Frame + INTNL_ARF_UPDATE = 9, // Internal Altref Frame (candidate for ALTREF2) + FRAME_UPDATE_TYPES = 10 } FRAME_UPDATE_TYPE; #define FC_ANIMATION_THRESH 0.15 @@ -120,12 +134,14 @@ typedef enum { typedef struct { unsigned char index; unsigned char first_inter_index; - RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1]; - FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1]; - unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1]; - unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1]; - unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1]; - int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1]; + RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 2]; + FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char brf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char bidir_pred_enabled[MAX_STATIC_GF_GROUP_LENGTH + 2]; + int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2]; } GF_GROUP; typedef struct { @@ -194,7 +210,6 @@ void vp9_first_pass_encode_tile_mb_row(struct VP9_COMP *cpi, void vp9_init_second_pass(struct VP9_COMP *cpi); void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); -void vp9_twopass_postencode_update(struct VP9_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi); @@ -202,6 +217,17 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi); void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height); +static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) { + assert(MAX_EXT_ARFS > 0); + if (arf_pending) { + if (interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1)) + return MAX_EXT_ARFS; + else if (interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS) + return MAX_EXT_ARFS - 1; + } + return 0; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 1ba518af8..50a2c9057 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (rv && search_subpel) { int subpel_force_stop = cpi->sf.mv.subpel_force_stop; if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2; + if (cpi->sf.mv.enable_adaptive_subpel_force_stop) { + int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; + if (abs(tmp_mv->as_mv.row) >= mv_thresh || + abs(tmp_mv->as_mv.col) >= mv_thresh) + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above; + else + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below; + } cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, @@ -342,7 +350,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, struct macroblockd_plane *const pd = &xd->plane[0]; const uint32_t dc_quant = pd->dequant[0]; const uint32_t ac_quant = pd->dequant[1]; - const int64_t dc_thr = dc_quant * dc_quant >> 6; + int64_t dc_thr = dc_quant * dc_quant >> 6; int64_t ac_thr = ac_quant * ac_quant >> 6; unsigned int var; int sum; @@ -398,6 +406,11 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, tx_size = TX_8X8; else if (tx_size > TX_16X16) tx_size = TX_16X16; + + // For screen-content force 4X4 tx_size over 8X8, for large variance. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && tx_size == TX_8X8 && + bsize <= BLOCK_16X16 && var > (ac_thr << 6)) + tx_size = TX_4X4; } else { tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); @@ -406,6 +419,10 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, assert(tx_size >= TX_8X8); xd->mi[0]->tx_size = tx_size; + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->zero_temp_sad_source && + x->source_variance == 0) + dc_thr = dc_thr << 1; + // Evaluate if the partition block is a skippable block in Y plane. { unsigned int sse16x16[16] = { 0 }; @@ -576,6 +593,12 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, xd->mi[0]->tx_size = TX_8X8; else if (xd->mi[0]->tx_size > TX_16X16) xd->mi[0]->tx_size = TX_16X16; + + // For screen-content force 4X4 tx_size over 8X8, for large variance. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + xd->mi[0]->tx_size == TX_8X8 && bsize <= BLOCK_16X16 && + var > (ac_thr << 6)) + xd->mi[0]->tx_size = TX_4X4; } else { xd->mi[0]->tx_size = VPXMIN(max_txsize_lookup[bsize], @@ -1332,6 +1355,7 @@ static void recheck_zeromv_after_denoising( mi->ref_frame[1] = NONE; mi->mv[0].as_int = 0; mi->interp_filter = EIGHTTAP; + if (cpi->sf.default_interp_filter == BILINEAR) mi->interp_filter = BILINEAR; xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0]; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); @@ -1421,7 +1445,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - const SVC *const svc = &cpi->svc; + SVC *const svc = &cpi->svc; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -1488,6 +1512,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int skip_ref_find_pred[4] = { 0 }; unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX; + int gf_temporal_ref = 0; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; @@ -1495,7 +1520,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #endif INTERP_FILTER filter_gf_svc = EIGHTTAP; MV_REFERENCE_FRAME best_second_ref_frame = NONE; - MV_REFERENCE_FRAME spatial_ref = GOLDEN_FRAME; + MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME; const struct segmentation *const seg = &cm->seg; int comp_modes = 0; int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES; @@ -1504,27 +1529,45 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int svc_mv_row = 0; int no_scaling = 0; unsigned int thresh_svc_skip_golden = 500; - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) { - int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id - 1, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + int scene_change_detected = + cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe); + x->source_variance = UINT_MAX; + if (cpi->sf.default_interp_filter == BILINEAR) { + best_pred_filter = BILINEAR; + filter_gf_svc = BILINEAR; + } + if (cpi->use_svc && svc->spatial_layer_id > 0) { + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1; } - if (cpi->svc.spatial_layer_id > 0 && - (cpi->svc.high_source_sad_superframe || no_scaling)) + if (svc->spatial_layer_id > 0 && + (svc->high_source_sad_superframe || no_scaling)) thresh_svc_skip_golden = 0; // Lower the skip threshold if lower spatial layer is better quality relative // to current layer. - else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 && - cm->base_qindex > cpi->svc.lower_layer_qindex + 15) + else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 && + cm->base_qindex > svc->lower_layer_qindex + 15) thresh_svc_skip_golden = 100; // Increase skip threshold if lower spatial layer is lower quality relative // to current layer. - else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 && - cm->base_qindex < cpi->svc.lower_layer_qindex - 20) + else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 && + cm->base_qindex < svc->lower_layer_qindex - 20) thresh_svc_skip_golden = 1000; + if (!cpi->use_svc || + (svc->use_gf_temporal_ref_current_layer && + !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { + gf_temporal_ref = 1; + if (cpi->rc.avg_frame_low_motion > 70) + thresh_svc_skip_golden = 500; + else + thresh_svc_skip_golden = 0; + } + init_ref_frame_cost(cm, xd, ref_frame_cost); memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES); @@ -1554,12 +1597,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // filter_ref, we use a less strict condition on assigning filter_ref. // This is to reduce the probabily of entering the flow of not assigning // filter_ref and then skip filter search. - if (xd->above_mi && is_inter_block(xd->above_mi)) - filter_ref = xd->above_mi->interp_filter; - else if (xd->left_mi && is_inter_block(xd->left_mi)) - filter_ref = xd->left_mi->interp_filter; - else - filter_ref = cm->interp_filter; + filter_ref = cm->interp_filter; + if (cpi->sf.default_interp_filter != BILINEAR) { + if (xd->above_mi && is_inter_block(xd->above_mi)) + filter_ref = xd->above_mi->interp_filter; + else if (xd->left_mi && is_inter_block(xd->left_mi)) + filter_ref = xd->left_mi->interp_filter; + } // initialize mode decisions vp9_rd_cost_reset(&best_rdc); @@ -1580,15 +1624,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #endif // CONFIG_VP9_HIGHBITDEPTH x->source_variance = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && mi->segment_id > 0 && + x->zero_temp_sad_source && x->source_variance == 0) { + mi->segment_id = 0; + vp9_init_plane_quantizers(cpi, x); + } } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { if (cpi->use_svc) { - int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame; } if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode) @@ -1596,7 +1646,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } #endif - if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc && + if (cpi->rc.frames_since_golden == 0 && gf_temporal_ref && !cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) { usable_ref_frame = LAST_FRAME; } else { @@ -1623,19 +1673,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // For svc mode, on spatial_layer_id > 0: if the reference has different scale // constrain the inter mode to only test zero motion. if (cpi->use_svc && svc->force_zero_mode_spatial_ref && - cpi->svc.spatial_layer_id > 0) { + svc->spatial_layer_id > 0 && !gf_temporal_ref) { if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; if (vp9_is_scaled(sf)) { svc_force_zero_mode[LAST_FRAME - 1] = 1; - spatial_ref = LAST_FRAME; + inter_layer_ref = LAST_FRAME; } } if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf; if (vp9_is_scaled(sf)) { svc_force_zero_mode[GOLDEN_FRAME - 1] = 1; - spatial_ref = GOLDEN_FRAME; + inter_layer_ref = GOLDEN_FRAME; } } } @@ -1652,6 +1702,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } } + if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad || + cpi->rc.avg_frame_low_motion < 60)) + usable_ref_frame = LAST_FRAME; + if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) use_golden_nonzeromv = 0; @@ -1677,6 +1731,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { + // Skip find_predictor if the reference frame is not in the + // ref_frame_flags (i.e., not used as a reference for this frame). + skip_ref_find_pred[ref_frame] = + !(cpi->ref_frame_flags & flag_list[ref_frame]); if (!skip_ref_find_pred[ref_frame]) { find_predictors(cpi, x, ref_frame, frame_mv, const_motion, &ref_frame_skip_mask, flag_list, tile_data, mi_row, @@ -1692,9 +1750,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // an averaging filter for downsampling (phase = 8). If so, we will test // a nonzero motion mode on the spatial reference. // The nonzero motion is half pixel shifted to left and top (-4, -4). - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && - svc_force_zero_mode[spatial_ref - 1] && - cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) { + if (cpi->use_svc && svc->spatial_layer_id > 0 && + svc_force_zero_mode[inter_layer_ref - 1] && + svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 && + !gf_temporal_ref) { svc_mv_col = -4; svc_mv_row = -4; flag_svc_subpel = 1; @@ -1713,7 +1772,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int inter_mv_mode = 0; int skip_this_mv = 0; int comp_pred = 0; - int force_gf_mv = 0; + int force_mv_inter_layer = 0; PREDICTION_MODE this_mode; second_ref_frame = NONE; @@ -1737,14 +1796,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (ref_frame > usable_ref_frame) continue; if (skip_ref_find_pred[ref_frame]) continue; + if (svc->previous_frame_is_intra_only) { + if (ref_frame != LAST_FRAME || frame_mv[this_mode][ref_frame].as_int != 0) + continue; + } + // If the segment reference frame feature is enabled then do nothing if the // current ref frame is not allowed. if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) continue; - if (flag_svc_subpel && ref_frame == spatial_ref) { - force_gf_mv = 1; + if (flag_svc_subpel && ref_frame == inter_layer_ref) { + force_mv_inter_layer = 1; // Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row), // otherwise set NEWMV to (svc_mv_col, svc_mv_row). if (this_mode == NEWMV) { @@ -1771,8 +1835,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, sse_zeromv_normalized < thresh_svc_skip_golden) continue; + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; + if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - this_mode != NEARESTMV) { + (frame_mv[this_mode][ref_frame].as_int != 0 || + (cpi->oxcf.content == VP9E_CONTENT_SCREEN && !svc->spatial_layer_id && + !x->zero_temp_sad_source))) { continue; } @@ -1802,14 +1870,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, continue; } - if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; - if (const_motion[ref_frame] && this_mode == NEARMV) continue; // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped // later. - if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME && + if (!force_mv_inter_layer && force_skip_low_temp_var && + ref_frame == GOLDEN_FRAME && frame_mv[this_mode][ref_frame].as_int != 0) { continue; } @@ -1823,7 +1890,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } if (cpi->use_svc) { - if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] && + if (!force_mv_inter_layer && svc_force_zero_mode[ref_frame - 1] && frame_mv[this_mode][ref_frame].as_int != 0) continue; } @@ -1883,10 +1950,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, (!cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index]))) - continue; + if (frame_mv[this_mode][ref_frame].as_int != 0) continue; - if (this_mode == NEWMV && !force_gf_mv) { - if (ref_frame > LAST_FRAME && !cpi->use_svc && + if (this_mode == NEWMV && !force_mv_inter_layer) { + if (ref_frame > LAST_FRAME && gf_temporal_ref && cpi->oxcf.rc_mode == VPX_CBR) { int tmp_sad; uint32_t dis; @@ -1931,7 +1998,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16, // for SVC encoding. - if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 && + if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 && frame_mv[NEWMV][ref_frame].as_mv.row == 0 && frame_mv[NEWMV][ref_frame].as_mv.col == 0) continue; @@ -2028,7 +2095,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search && (ref_frame == LAST_FRAME || - (ref_frame == GOLDEN_FRAME && !force_gf_mv && + (ref_frame == GOLDEN_FRAME && !force_mv_inter_layer && (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) && (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) { int pf_rate[3]; @@ -2188,7 +2255,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Skipping checking: test to see if this block can be reconstructed by // prediction only. - if (cpi->allow_encode_breakout) { + if (cpi->allow_encode_breakout && !xd->lossless && !scene_change_detected) { encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode, var_y, sse_y, yv12_mb, &this_rdc.rate, &this_rdc.dist, flag_preduv_computed); @@ -2235,7 +2302,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // If early termination flag is 1 and at least 2 modes are checked, // the mode search is terminated. - if (best_early_term && idx > 0) { + if (best_early_term && idx > 0 && !scene_change_detected) { x->skip = 1; break; } @@ -2254,17 +2321,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // layer is chosen as the reference. Always perform intra prediction if // LAST is the only reference, or is_key_frame is set, or on base // temporal layer. - if (cpi->svc.spatial_layer_id) { + if (svc->spatial_layer_id && !gf_temporal_ref) { perform_intra_pred = - cpi->svc.temporal_layer_id == 0 || - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame || + svc->temporal_layer_id == 0 || + svc->layer_context[svc->temporal_layer_id].is_key_frame || !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || - (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + (!svc->layer_context[svc->temporal_layer_id].is_key_frame && svc_force_zero_mode[best_ref_frame - 1]); inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; } - if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && - cpi->rc.is_src_frame_alt_ref) + if ((cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && + cpi->rc.is_src_frame_alt_ref) || + svc->previous_frame_is_intra_only) perform_intra_pred = 0; // If the segment reference frame feature is enabled and set then @@ -2276,6 +2344,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Perform intra prediction search, if the best SAD is above a certain // threshold. if (best_rdc.rdcost == INT64_MAX || + (scene_change_detected && perform_intra_pred) || ((!force_skip_low_temp_var || bsize < BLOCK_32X32 || x->content_state_sb == kVeryHighSad) && perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh && @@ -2429,7 +2498,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, frame_mv, reuse_inter_pred, best_tx_size, best_mode, best_ref_frame, best_pred_filter, best_mode_skip_txfm); - vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision); + vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision, + gf_temporal_ref); recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb, &best_rdc, bsize, mi_row, mi_col); best_ref_frame = ctx_den.best_ref_frame; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 599337f80..ec969e0cc 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -273,6 +273,14 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { const VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + // On dropped frame, don't update buffer if its currently stable + // (above optimal level). This can cause issues when full superframe + // can drop (!= LAYER_DROP), since QP is adjusted downwards with buffer + // overflow, which can cause more frame drops. + if (cpi->svc.framedrop_mode != LAYER_DROP && encoded_frame_size == 0 && + rc->buffer_level > rc->optimal_buffer_level) + return; + // Non-viewable frames are a special case and are treated as pure overhead. if (!cm->show_frame) { rc->bits_off_target -= encoded_frame_size; @@ -355,6 +363,8 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->high_source_sad = 0; rc->reset_high_source_sad = 0; rc->high_source_sad_lagindex = -1; + rc->hybrid_intra_scene_change = 0; + rc->re_encode_maxq_scene_change = 0; rc->alt_ref_gf_group = 0; rc->last_frame_is_src_altref = 0; rc->fac_active_worst_inter = 150; @@ -390,7 +400,34 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; } -static int check_buffer(VP9_COMP *cpi, int drop_mark) { +static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) { + SVC *svc = &cpi->svc; + if (!cpi->use_svc || cpi->svc.framedrop_mode != FULL_SUPERFRAME_DROP) { + RATE_CONTROL *const rc = &cpi->rc; + return (rc->buffer_level > drop_mark); + } else { + int i; + // For SVC in the FULL_SUPERFRAME_DROP): the condition on + // buffer (if its above threshold, so no drop) is checked on current and + // upper spatial layers. If any spatial layer is not above threshold then + // we return 0. + for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + // Exclude check for layer whose bitrate is 0. + if (lc->target_bandwidth > 0) { + const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] * + lrc->optimal_buffer_level / 100); + if (!(lrc->buffer_level > drop_mark_layer)) return 0; + } + } + return 1; + } +} + +static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) { SVC *svc = &cpi->svc; if (!cpi->use_svc || cpi->svc.framedrop_mode == LAYER_DROP) { RATE_CONTROL *const rc = &cpi->rc; @@ -398,32 +435,56 @@ static int check_buffer(VP9_COMP *cpi, int drop_mark) { } else { int i; // For SVC in the constrained framedrop mode (svc->framedrop_mode = - // CONSTRAINED_LAYER_DROP): the condition on buffer (to drop frame) is - // checked on current and upper spatial layers. + // CONSTRAINED_LAYER_DROP or FULL_SUPERFRAME_DROP): the condition on + // buffer (if its below threshold, so drop frame) is checked on current + // and upper spatial layers. For FULL_SUPERFRAME_DROP mode if any + // spatial layer is <= threshold, then we return 1 (drop). for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; - const int drop_mark_layer = - (int)(cpi->svc.framedrop_thresh[i] * lrc->optimal_buffer_level / 100); - if (!(lrc->buffer_level <= drop_mark_layer)) return 0; + // Exclude check for layer whose bitrate is 0. + if (lc->target_bandwidth > 0) { + const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] * + lrc->optimal_buffer_level / 100); + if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) { + if (lrc->buffer_level <= drop_mark_layer) return 1; + } else { + if (!(lrc->buffer_level <= drop_mark_layer)) return 0; + } + } } - return 1; + if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) + return 0; + else + return 1; } } -int vp9_rc_drop_frame(VP9_COMP *cpi) { +static int drop_frame(VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; + SVC *svc = &cpi->svc; int drop_frames_water_mark = oxcf->drop_frames_water_mark; - if (cpi->use_svc) - drop_frames_water_mark = - cpi->svc.framedrop_thresh[cpi->svc.spatial_layer_id]; - if (!drop_frames_water_mark) { + if (cpi->use_svc) { + // If we have dropped max_consec_drop frames, then we don't + // drop this spatial layer, and reset counter to 0. + if (svc->drop_count[svc->spatial_layer_id] == svc->max_consec_drop) { + svc->drop_count[svc->spatial_layer_id] = 0; + return 0; + } else { + drop_frames_water_mark = svc->framedrop_thresh[svc->spatial_layer_id]; + } + } + if (!drop_frames_water_mark || + (svc->spatial_layer_id > 0 && + svc->framedrop_mode == FULL_SUPERFRAME_DROP)) { return 0; } else { - if (rc->buffer_level < 0) { + if ((rc->buffer_level < 0 && svc->framedrop_mode != FULL_SUPERFRAME_DROP) || + (check_buffer_below_thresh(cpi, -1) && + svc->framedrop_mode == FULL_SUPERFRAME_DROP)) { // Always drop if buffer is below 0. return 1; } else { @@ -431,9 +492,11 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { // (starting with the next frame) until it increases back over drop_mark. int drop_mark = (int)(drop_frames_water_mark * rc->optimal_buffer_level / 100); - if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { + if (check_buffer_above_thresh(cpi, drop_mark) && + (rc->decimation_factor > 0)) { --rc->decimation_factor; - } else if (check_buffer(cpi, drop_mark) && rc->decimation_factor == 0) { + } else if (check_buffer_below_thresh(cpi, drop_mark) && + rc->decimation_factor == 0) { rc->decimation_factor = 1; } if (rc->decimation_factor > 0) { @@ -452,11 +515,81 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { } } +int vp9_rc_drop_frame(VP9_COMP *cpi) { + SVC *svc = &cpi->svc; + int svc_prev_layer_dropped = 0; + // In the constrained or full_superframe framedrop mode for svc + // (framedrop_mode != LAYER_DROP), if the previous spatial layer was + // dropped, drop the current spatial layer. + if (cpi->use_svc && svc->spatial_layer_id > 0 && + svc->drop_spatial_layer[svc->spatial_layer_id - 1]) + svc_prev_layer_dropped = 1; + if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) || + drop_frame(cpi)) { + vp9_rc_postencode_update_drop_frame(cpi); + cpi->ext_refresh_frame_flags_pending = 0; + cpi->last_frame_dropped = 1; + if (cpi->use_svc) { + svc->last_layer_dropped[svc->spatial_layer_id] = 1; + svc->drop_spatial_layer[svc->spatial_layer_id] = 1; + svc->drop_count[svc->spatial_layer_id]++; + svc->skip_enhancement_layer = 1; + if (svc->framedrop_mode == LAYER_DROP || + svc->drop_spatial_layer[0] == 0) { + // For the case of constrained drop mode where the base is dropped + // (drop_spatial_layer[0] == 1), which means full superframe dropped, + // we don't increment the svc frame counters. In particular temporal + // layer counter (which is incremented in vp9_inc_frame_in_layer()) + // won't be incremented, so on a dropped frame we try the same + // temporal_layer_id on next incoming frame. This is to avoid an + // issue with temporal alignement with full superframe dropping. + vp9_inc_frame_in_layer(cpi); + } + if (svc->spatial_layer_id == svc->number_spatial_layers - 1) { + int i; + int all_layers_drop = 1; + for (i = 0; i < svc->spatial_layer_id; i++) { + if (svc->drop_spatial_layer[i] == 0) { + all_layers_drop = 0; + break; + } + } + if (all_layers_drop == 1) svc->skip_enhancement_layer = 0; + } + } + return 1; + } + return 0; +} + +static int adjust_q_cbr(const VP9_COMP *cpi, int q) { + // This makes sure q is between oscillating Qs to prevent resonance. + if (!cpi->rc.reset_high_source_sad && + (!cpi->oxcf.gf_cbr_boost_pct || + !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) && + (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && + cpi->rc.q_1_frame != cpi->rc.q_2_frame) { + int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), + VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); + // If the previous frame had overshoot and the current q needs to increase + // above the clamped value, reduce the clamp for faster reaction to + // overshoot. + if (cpi->rc.rc_1_frame == -1 && q > qclamp) + q = (q + qclamp) >> 1; + else + q = qclamp; + } + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) + vp9_cyclic_refresh_limit_q(cpi, &q); + return q; +} + static double get_rate_correction_factor(const VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; + const VP9_COMMON *const cm = &cpi->common; double rcf; - if (cpi->common.frame_type == KEY_FRAME) { + if (frame_is_intra_only(cm)) { rcf = rc->rate_correction_factors[KF_STD]; } else if (cpi->oxcf.pass == 2) { RATE_FACTOR_LEVEL rf_lvl = @@ -476,13 +609,14 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { RATE_CONTROL *const rc = &cpi->rc; + const VP9_COMMON *const cm = &cpi->common; // Normalize RCF to account for the size-dependent scaling factor. factor /= rcf_mult[cpi->rc.frame_size_selector]; factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR); - if (cpi->common.frame_type == KEY_FRAME) { + if (frame_is_intra_only(cm)) { rc->rate_correction_factors[KF_STD] = factor; } else if (cpi->oxcf.pass == 2) { RATE_FACTOR_LEVEL rf_lvl = @@ -519,8 +653,9 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { projected_size_based_on_q = vp9_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor); } else { + FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; projected_size_based_on_q = - vp9_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex, cm->MBs, + vp9_estimate_bits_at_q(frame_type, cm->base_qindex, cm->MBs, rate_correction_factor, cm->bit_depth); } // Work out a size correction factor. @@ -594,8 +729,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, bits_per_mb_at_this_q = (int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor); } else { + FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb( - cm->frame_type, i, correction_factor, cm->bit_depth); + frame_type, i, correction_factor, cm->bit_depth); } if (bits_per_mb_at_this_q <= target_bits_per_mb) { @@ -610,22 +746,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, } } while (++i <= active_worst_quality); - // In CBR mode, this makes sure q is between oscillating Qs to prevent - // resonance. - if (cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.reset_high_source_sad && - (!cpi->oxcf.gf_cbr_boost_pct || - !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) && - (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && - cpi->rc.q_1_frame != cpi->rc.q_2_frame) { - int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), - VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); - // If the previous had overshoot and the current q needs to increase above - // the clamped value, reduce the clamp for faster reaction to overshoot. - if (cpi->rc.rc_1_frame == -1 && q > qclamp) - q = (q + qclamp) >> 1; - else - q = qclamp; - } + // Adjustment to q for CBR mode. + if (cpi->oxcf.rc_mode == VPX_CBR) return adjust_q_cbr(cpi, q); + return q; } @@ -705,7 +828,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int active_worst_quality; int ambient_qp; unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers; - if (cm->frame_type == KEY_FRAME || rc->reset_high_source_sad) + if (frame_is_intra_only(cm) || rc->reset_high_source_sad) return rc->worst_quality; // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME] // for the first few frames following key frame. These are both initialized @@ -730,8 +853,10 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2); if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. - // Maximum limit for down adjustment, ~30%. + // Maximum limit for down adjustment ~30%; make it lower for screen content. int max_adjustment_down = active_worst_quality / 3; + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) + max_adjustment_down = active_worst_quality >> 3; if (max_adjustment_down) { buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) / max_adjustment_down); @@ -836,7 +961,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, *bottom_index = active_best_quality; // Special case code to try and match quality with forced key frames - if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) { + if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { q = rc->last_boosted_qindex; } else { q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, @@ -1075,7 +1200,7 @@ int vp9_frame_type_qdelta(const VP9_COMP *cpi, int rf_level, int q) { #define STATIC_MOTION_THRESH 95 static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, - int *top_index) { + int *top_index, int gf_group_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1122,6 +1247,11 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, active_best_quality /= 4; } + // Dont allow the active min to be lossless (q0) unlesss the max q + // already indicates lossless. + active_best_quality = + VPXMIN(active_worst_quality, VPXMAX(1, active_best_quality)); + // Allow somewhat lower kf minq with small image formats. if ((cm->width * cm->height) <= (352 * 288)) { q_adj_factor -= 0.25; @@ -1164,7 +1294,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, // Modify best quality for second level arfs. For mode VPX_Q this // becomes the baseline frame q. - if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) active_best_quality = (active_best_quality + cq_level + 1) / 2; } } else { @@ -1200,12 +1330,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, } } + // For normal frames do not allow an active minq lower than the q used for + // the last boosted frame. + if (!frame_is_intra_only(cm) && + (!(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) || + rc->is_src_frame_alt_ref)) { + active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex); + } + #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY vpx_clear_system_state(); // Static forced key frames Q restrictions dealt with elsewhere. if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced || cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) { - int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index], + int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group_index], active_worst_quality); active_worst_quality = VPXMAX(active_worst_quality + qdelta, active_best_quality); @@ -1261,13 +1399,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index, int *top_index) { int q; + const int gf_group_index = cpi->twopass.gf_group.index; if (cpi->oxcf.pass == 0) { if (cpi->oxcf.rc_mode == VPX_CBR) q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index); else q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index); } else { - q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index); + q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index, + gf_group_index); } if (cpi->sf.use_nonrd_pick_mode) { if (cpi->sf.force_frame_boost == 1) q -= cpi->sf.max_delta_qindex; @@ -1280,6 +1420,62 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index, return q; } +void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) { + TWO_PASS *const twopass = &cpi->twopass; + + cpi->rc.is_src_frame_alt_ref = 0; + switch (twopass->gf_group.update_type[gf_group_index]) { + case KF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; + break; + case LF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + break; + case GF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 0; + break; + case OVERLAY_UPDATE: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 0; + cpi->rc.is_src_frame_alt_ref = 1; + break; + default: + assert(twopass->gf_group.update_type[gf_group_index] == ARF_UPDATE); + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 1; + break; + } +} + +void vp9_estimate_qp_gop(VP9_COMP *cpi) { + int gop_length = cpi->rc.baseline_gf_interval; + int bottom_index, top_index; + int idx; + const int gf_index = cpi->twopass.gf_group.index; + + for (idx = 1; idx <= gop_length + 1 && idx < MAX_LAG_BUFFERS; ++idx) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[idx]; + int target_rate = cpi->twopass.gf_group.bit_allocation[idx]; + cpi->twopass.gf_group.index = idx; + vp9_rc_set_frame_target(cpi, target_rate); + vp9_configure_buffer_updates(cpi, idx); + tpl_frame->base_qindex = + rc_pick_q_and_bounds_two_pass(cpi, &bottom_index, &top_index, idx); + tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1); + } + // Reset the actual index and frame update + cpi->twopass.gf_group.index = gf_index; + vp9_configure_buffer_updates(cpi, gf_index); +} + void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit) { @@ -1386,7 +1582,8 @@ static void compute_frame_low_motion(VP9_COMP *const cpi) { int cnt_zeromv = 0; for (mi_row = 0; mi_row < rows; mi_row++) { for (mi_col = 0; mi_col < cols; mi_col++) { - if (abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16) + if (mi[0]->ref_frame[0] == LAST_FRAME && + abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16) cnt_zeromv++; mi++; } @@ -1400,6 +1597,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; + SVC *const svc = &cpi->svc; const int qindex = cm->base_qindex; // Update rate control heuristics @@ -1409,7 +1607,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { vp9_rc_update_rate_correction_factors(cpi); // Keep a record of last Q and ambient average Q. - if (cm->frame_type == KEY_FRAME) { + if (frame_is_intra_only(cm)) { rc->last_q[KEY_FRAME] = qindex; rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); @@ -1453,13 +1651,13 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { rc->last_boosted_qindex = qindex; } - if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex; + if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex; update_buffer_level(cpi, rc->projected_frame_size); // Rolling monitors of whether we are over or underspending used to help // regulate min and Max Q in two pass. - if (cm->frame_type != KEY_FRAME) { + if (!frame_is_intra_only(cm)) { rc->rolling_target_bits = ROUND_POWER_OF_TWO( rc->rolling_target_bits * 3 + rc->this_frame_target, 2); rc->rolling_actual_bits = ROUND_POWER_OF_TWO( @@ -1478,7 +1676,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { if (!cpi->use_svc) { if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && - (cm->frame_type != KEY_FRAME)) + (!frame_is_intra_only(cm))) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); else @@ -1486,7 +1684,28 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { update_golden_frame_stats(cpi); } - if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0; + // If second (long term) temporal reference is used for SVC, + // update the golden frame counter, only for base temporal layer. + if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer && + svc->temporal_layer_id == 0) { + int i = 0; + if (cpi->refresh_golden_frame) + rc->frames_since_golden = 0; + else + rc->frames_since_golden++; + // Decrement count down till next gf + if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; + // Update the frames_since_golden for all upper temporal layers. + for (i = 1; i < svc->number_temporal_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->frames_since_golden = rc->frames_since_golden; + } + } + + if (frame_is_intra_only(cm)) rc->frames_since_key = 0; if (cm->show_frame) { rc->frames_since_key++; rc->frames_to_key--; @@ -1500,18 +1719,34 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } if (oxcf->pass == 0) { - if (cm->frame_type != KEY_FRAME) { + if (!frame_is_intra_only(cm) && + (!cpi->use_svc || + (cpi->use_svc && + !svc->layer_context[svc->temporal_layer_id].is_key_frame && + svc->spatial_layer_id == svc->number_spatial_layers - 1))) { compute_frame_low_motion(cpi); if (cpi->sf.use_altref_onepass) update_altref_usage(cpi); } + // For SVC: set avg_frame_low_motion (only computed on top spatial layer) + // to all lower spatial layers. + if (cpi->use_svc && + svc->spatial_layer_id == svc->number_spatial_layers - 1) { + int i; + for (i = 0; i < svc->number_spatial_layers - 1; ++i) { + const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->avg_frame_low_motion = rc->avg_frame_low_motion; + } + } cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref; } - if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0; + if (!frame_is_intra_only(cm)) rc->reset_high_source_sad = 0; rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth; - if (cpi->use_svc && - cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) - cpi->svc.lower_layer_qindex = cm->base_qindex; + if (cpi->use_svc && svc->spatial_layer_id < svc->number_spatial_layers - 1) + svc->lower_layer_qindex = cm->base_qindex; } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { @@ -1707,27 +1942,78 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { return vp9_rc_clamp_iframe_target_size(cpi, target); } +static void set_intra_only_frame(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + // Don't allow intra_only frame for bypass/flexible SVC mode, or if number + // of spatial layers is 1 or if number of spatial or temporal layers > 3. + // Also if intra-only is inserted on very first frame, don't allow if + // if number of temporal layers > 1. This is because on intra-only frame + // only 3 reference buffers can be updated, but for temporal layers > 1 + // we generally need to use buffer slots 4 and 5. + if ((cm->current_video_frame == 0 && svc->number_temporal_layers > 1) || + svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS || + svc->number_spatial_layers > 3 || svc->number_temporal_layers > 3 || + svc->number_spatial_layers == 1) + return; + cm->show_frame = 0; + cm->intra_only = 1; + cm->frame_type = INTER_FRAME; + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + cpi->ext_refresh_golden_frame = 1; + cpi->ext_refresh_alt_ref_frame = 1; + if (cm->current_video_frame == 0) { + cpi->lst_fb_idx = 0; + cpi->gld_fb_idx = 1; + cpi->alt_fb_idx = 2; + } else { + int i; + int count = 0; + cpi->lst_fb_idx = -1; + cpi->gld_fb_idx = -1; + cpi->alt_fb_idx = -1; + // For intra-only frame we need to refresh all slots that were + // being used for the base layer (fb_idx_base[i] == 1). + // Start with assigning last first, then golden and then alt. + for (i = 0; i < REF_FRAMES; ++i) { + if (svc->fb_idx_base[i] == 1) count++; + if (count == 1 && cpi->lst_fb_idx == -1) cpi->lst_fb_idx = i; + if (count == 2 && cpi->gld_fb_idx == -1) cpi->gld_fb_idx = i; + if (count == 3 && cpi->alt_fb_idx == -1) cpi->alt_fb_idx = i; + } + // If golden or alt is not being used for base layer, then set them + // to the lst_fb_idx. + if (cpi->gld_fb_idx == -1) cpi->gld_fb_idx = cpi->lst_fb_idx; + if (cpi->alt_fb_idx == -1) cpi->alt_fb_idx = cpi->lst_fb_idx; + } +} + void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + SVC *const svc = &cpi->svc; int target = rc->avg_frame_bandwidth; - int layer = - LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); + int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); // Periodic key frames is based on the super-frame counter // (svc.current_superframe), also only base spatial layer is key frame. - if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || + // Key frame is set for any of the following: very first frame, frame flags + // indicates key, superframe counter hits key frequencey, or (non-intra) sync + // flag is set for spatial layer 0. + if ((cm->current_video_frame == 0 && !svc->previous_frame_is_intra_only) || + (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && - (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) && - cpi->svc.spatial_layer_id == 0)) { + (svc->current_superframe % cpi->oxcf.key_freq == 0) && + !svc->previous_frame_is_intra_only && svc->spatial_layer_id == 0) || + (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; if (is_one_pass_cbr_svc(cpi)) { if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi); - layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - cpi->svc.layer_context[layer].is_key_frame = 1; + layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + svc->layer_context[layer].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); // Assumption here is that LAST_FRAME is being updated for a keyframe. // Thus no change in update flags. @@ -1736,25 +2022,73 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } else { cm->frame_type = INTER_FRAME; if (is_one_pass_cbr_svc(cpi)) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) { - lc->is_key_frame = 0; - } else { - lc->is_key_frame = - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; - } + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + // Add condition current_video_frame > 0 for the case where first frame + // is intra only followed by overlay/copy frame. In this case we don't + // want to reset is_key_frame to 0 on overlay/copy frame. + lc->is_key_frame = + (svc->spatial_layer_id == 0 && cm->current_video_frame > 0) + ? 0 + : svc->layer_context[svc->temporal_layer_id].is_key_frame; target = calc_pframe_target_size_one_pass_cbr(cpi); } } + // Check if superframe contains a sync layer request. + vp9_svc_check_spatial_layer_sync(cpi); + + // If long term termporal feature is enabled, set the period of the update. + // The update/refresh of this reference frame is always on base temporal + // layer frame. + if (svc->use_gf_temporal_ref_current_layer) { + // Only use gf long-term prediction on non-key superframes. + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // Use golden for this reference, which will be used for prediction. + int index = svc->spatial_layer_id; + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->gld_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + // Enable prediction off LAST (last reference) and golden (which will + // generally be further behind/long-term reference). + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + // Check for update/refresh of reference: only refresh on base temporal + // layer. + if (svc->temporal_layer_id == 0) { + if (svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // On key frame we update the buffer index used for long term reference. + // Use the alt_ref since it is not used or updated on key frames. + int index = svc->spatial_layer_id; + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + cpi->ext_refresh_alt_ref_frame = 1; + } else if (rc->frames_till_gf_update_due == 0) { + // Set perdiod of next update. Make it a multiple of 10, as the cyclic + // refresh is typically ~10%, and we'd like the update to happen after + // a few cylces of the refresh (so it better quality frame). Note the + // cyclic refresh for SVC only operates on base temporal layer frames. + // Choose 20 as perdiod for now (2 cycles). + rc->baseline_gf_interval = 20; + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + cpi->ext_refresh_golden_frame = 1; + rc->gfu_boost = DEFAULT_GF_BOOST; + } + } + } else if (!svc->use_gf_temporal_ref) { + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; + } + if (svc->set_intra_only_frame) { + set_intra_only_frame(cpi); + target = calc_iframe_target_size_one_pass_cbr(cpi); + } // Any update/change of global cyclic refresh parameters (amount/delta-qp) // should be done here, before the frame qp is selected. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); vp9_rc_set_frame_target(cpi, target); - rc->frames_till_gf_update_due = INT_MAX; - rc->baseline_gf_interval = INT_MAX; } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { @@ -1762,8 +2096,8 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target; // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. - if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { + if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || + rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0)) { cm->frame_type = KEY_FRAME; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; @@ -1790,7 +2124,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); - if (cm->frame_type == KEY_FRAME) + if (frame_is_intra_only(cm)) target = calc_iframe_target_size_one_pass_cbr(cpi); else target = calc_pframe_target_size_one_pass_cbr(cpi); @@ -2426,6 +2760,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad) rc->this_frame_target = rc->avg_frame_bandwidth; } + // For SVC the new (updated) avg_source_sad[0] for the current superframe + // updates the setting for all layers. + if (cpi->use_svc) { + int sl, tl; + SVC *const svc = &cpi->svc; + for (sl = 0; sl < svc->number_spatial_layers; ++sl) + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->avg_source_sad[0] = rc->avg_source_sad[0]; + } + } // For VBR, under scene change/high content change, force golden refresh. if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME && rc->high_source_sad && rc->frames_to_key > 3 && @@ -2459,8 +2806,11 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; - int thresh_qp = 3 * (rc->worst_quality >> 2); - int thresh_rate = rc->avg_frame_bandwidth * 10; + int thresh_qp = 7 * (rc->worst_quality >> 3); + int thresh_rate = rc->avg_frame_bandwidth << 3; + // Lower rate threshold for video. + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + thresh_rate = rc->avg_frame_bandwidth << 2; if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) { double rate_correction_factor = cpi->rc.rate_correction_factors[INTER_NORMAL]; @@ -2471,6 +2821,28 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { int enumerator; // Force a re-encode, and for now use max-QP. *q = cpi->rc.worst_quality; + cpi->cyclic_refresh->counter_encode_maxq_scene_change = 0; + cpi->rc.re_encode_maxq_scene_change = 1; + // If the frame_size is much larger than the threshold (big content change) + // and the encoded frame used alot of Intra modes, then force hybrid_intra + // encoding for the re-encode on this scene change. hybrid_intra will + // use rd-based intra mode selection for small blocks. + if (frame_size > (thresh_rate << 1) && cpi->svc.spatial_layer_id == 0) { + MODE_INFO **mi = cm->mi_grid_visible; + int sum_intra_usage = 0; + int mi_row, mi_col; + int tot = 0; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { + if (mi[0]->ref_frame[0] == INTRA_FRAME) sum_intra_usage++; + tot++; + mi++; + } + mi += 8; + } + sum_intra_usage = 100 * sum_intra_usage / (cm->mi_rows * cm->mi_cols); + if (sum_intra_usage > 60) cpi->rc.hybrid_intra_scene_change = 1; + } // Adjust avg_frame_qindex, buffer_level, and rate correction factors, as // these parameters will affect QP selection for subsequent frames. If they // have settled down to a very different (low QP) state, then not adjusting diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 3a40e0138..cf37117f9 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -114,6 +114,16 @@ typedef struct { int source_alt_ref_active; int is_src_frame_alt_ref; + // Length of the bi-predictive frame group interval + int bipred_group_interval; + + // NOTE: Different types of frames may have different bits allocated + // accordingly, aiming to achieve the overall optimal RD performance. + int is_bwd_ref_frame; + int is_last_bipred_frame; + int is_bipred_frame; + int is_src_frame_ext_arf; + int avg_frame_bandwidth; // Average frame size target for clip int min_frame_bandwidth; // Minimum allocation used for any frame int max_frame_bandwidth; // Maximum burst rate allowed for a frame. @@ -179,6 +189,8 @@ typedef struct { int last_frame_is_src_altref; int high_source_sad; int count_last_scene_change; + int hybrid_intra_scene_change; + int re_encode_maxq_scene_change; int avg_frame_low_motion; int af_ratio_onepass_vbr; int force_qpmin; @@ -302,6 +314,10 @@ void vp9_scene_detection_onepass(struct VP9_COMP *cpi); int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q); +void vp9_configure_buffer_updates(struct VP9_COMP *cpi, int gf_group_index); + +void vp9_estimate_qp_gop(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 3407e74c6..dcdd00d92 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -200,6 +200,38 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { return (int)rdmult; } +int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) { + const VP9_COMMON *cm = &cpi->common; + int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth); + +#if CONFIG_VP9_HIGHBITDEPTH + int64_t rdmult = 0; + switch (cpi->common.bit_depth) { + case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break; + case VPX_BITS_10: + rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4); + break; + default: + assert(cpi->common.bit_depth == VPX_BITS_12); + rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8); + break; + } +#else + int64_t rdmult = (int)((88 * q * q / beta) / 24); +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; + const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100)); + + rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; + rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); + } + if (rdmult < 1) rdmult = 1; + return (int)rdmult; +} + static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { double q; #if CONFIG_VP9_HIGHBITDEPTH diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 59022c106..919f74ebd 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -108,9 +108,14 @@ typedef struct RD_OPT { int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; +#if CONFIG_CONSISTENT_RECODE + int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES]; + int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; +#endif int RDMULT; int RDDIV; + double r0; } RD_OPT; typedef struct RD_COST { @@ -134,6 +139,8 @@ int64_t vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex); +int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta); + void vp9_initialize_rd_consts(struct VP9_COMP *cpi); void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e39df033a..4005f85b1 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -783,7 +783,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, BLOCK_SIZE bsize, - TX_SIZE tx_size, int use_fast_coef_casting) { + TX_SIZE tx_size, int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblockd_plane *const pd = &xd->plane[plane]; struct rdcost_block_args args; @@ -791,7 +791,7 @@ static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, args.cpi = cpi; args.x = x; args.best_rd = ref_best_rd; - args.use_fast_coef_costing = use_fast_coef_casting; + args.use_fast_coef_costing = use_fast_coef_costing; args.skippable = 1; if (plane == 0) xd->mi[0]->tx_size = tx_size; @@ -847,7 +847,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, { INT64_MAX, INT64_MAX } }; int n; int s0, s1; - int64_t best_rd = INT64_MAX; + int64_t best_rd = ref_best_rd; TX_SIZE best_tx = max_tx_size; int start_tx, end_tx; const int tx_size_ctx = get_tx_size_context(xd); @@ -868,8 +868,8 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, for (n = start_tx; n >= end_tx; n--) { const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n]; - txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0, - bs, n, cpi->sf.use_fast_coef_costing); + txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n, + cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { r[n][1] += r_tx_size; @@ -3073,6 +3073,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, // lock mechanism involved with reads from // tile_mode_map const int mode_search_skip_flags = sf->mode_search_skip_flags; + const int is_rect_partition = + num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize]; int64_t mask_filter = 0; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; @@ -3224,6 +3226,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, vp9_zero(x->sum_y_eobs); + if (is_rect_partition) { + if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue; + if (second_ref_frame > 0 && + (ctx->skip_ref_frame_mask & (1 << second_ref_frame))) + continue; + } + // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (midx == mode_skip_start && best_mode_index >= 0) { @@ -3612,9 +3621,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, } if (best_mode_index < 0 || best_rd >= best_rd_so_far) { - // If adaptive interp filter is enabled, then the current leaf node of 8x8 - // data is needed for sub8x8. Hence preserve the context. +// If adaptive interp filter is enabled, then the current leaf node of 8x8 +// data is needed for sub8x8. Hence preserve the context. +#if CONFIG_CONSISTENT_RECODE + if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; +#else if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; +#endif rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 90da68726..75a8de270 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -61,32 +61,58 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP9_COMMON *const cm = &cpi->common; + const int min_frame_size = VPXMIN(cm->width, cm->height); + const int is_480p_or_larger = min_frame_size >= 480; + const int is_720p_or_larger = min_frame_size >= 720; + const int is_1080p_or_larger = min_frame_size >= 1080; + const int is_2160p_or_larger = min_frame_size >= 2160; // speed 0 features sf->partition_search_breakout_thr.dist = (1 << 20); sf->partition_search_breakout_thr.rate = 80; + sf->use_square_only_threshold = BLOCK_SIZES; - // Currently, the machine-learning based partition search early termination - // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. - if (VPXMIN(cm->width, cm->height) >= 480) { + if (is_480p_or_larger) { + // Currently, the machine-learning based partition search early termination + // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. sf->ml_partition_search_early_termination = 1; + } else { + sf->use_square_only_threshold = BLOCK_32X32; + } + + if (!is_1080p_or_larger) { + sf->use_ml_partition_search_breakout = 1; + if (is_720p_or_larger) { + sf->ml_partition_search_breakout_thresh[0] = 0.0f; + sf->ml_partition_search_breakout_thresh[1] = 0.0f; + sf->ml_partition_search_breakout_thresh[2] = 0.0f; + } else { + sf->ml_partition_search_breakout_thresh[0] = 2.5f; + sf->ml_partition_search_breakout_thresh[1] = 1.5f; + sf->ml_partition_search_breakout_thresh[2] = 1.5f; + } } if (speed >= 1) { sf->ml_partition_search_early_termination = 0; + sf->use_square_only_threshold = BLOCK_4X4; - if (VPXMIN(cm->width, cm->height) >= 720) { + if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 23); + sf->use_ml_partition_search_breakout = 0; } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 21); + sf->ml_partition_search_breakout_thresh[0] = 0.0f; + sf->ml_partition_search_breakout_thresh[1] = 0.0f; + sf->ml_partition_search_breakout_thresh[2] = 0.0f; } } if (speed >= 2) { - if (VPXMIN(cm->width, cm->height) >= 720) { + if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; @@ -96,11 +122,14 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_thr.dist = (1 << 22); sf->partition_search_breakout_thr.rate = 100; + sf->ml_partition_search_breakout_thresh[0] = 0.0f; + sf->ml_partition_search_breakout_thresh[1] = -1.0f; + sf->ml_partition_search_breakout_thresh[2] = -4.0f; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); // Use a set of speed features for 4k videos. - if (VPXMIN(cm->width, cm->height) >= 2160) { + if (is_2160p_or_larger) { sf->use_square_partition_only = 1; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; @@ -112,7 +141,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 3) { - if (VPXMIN(cm->width, cm->height) >= 720) { + sf->use_ml_partition_search_breakout = 0; + if (is_720p_or_larger) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; sf->partition_search_breakout_thr.dist = (1 << 25); @@ -137,7 +167,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, if (speed >= 4) { sf->partition_search_breakout_thr.rate = 300; - if (VPXMIN(cm->width, cm->height) >= 720) { + if (is_720p_or_larger) { sf->partition_search_breakout_thr.dist = (1 << 26); } else { sf->partition_search_breakout_thr.dist = (1 << 24); @@ -167,7 +197,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->allow_skip_recode = 1; sf->less_rectangular_check = 1; sf->use_square_partition_only = !frame_is_boosted(cpi); - sf->use_square_only_threshold = BLOCK_16X16; + sf->prune_ref_frame_for_rect_partitions = 1; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { sf->exhaustive_searches_thresh = (1 << 22); @@ -183,6 +213,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, } if (speed >= 1) { + sf->enable_tpl_model = 0; + sf->prune_ref_frame_for_rect_partitions = 0; if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) || @@ -199,10 +231,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5]; sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5]; - - sf->use_square_only_threshold = BLOCK_4X4; sf->less_rectangular_check = 1; - sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->mv.auto_mv_step_size = 1; @@ -375,6 +404,9 @@ static void set_rt_speed_feature_framesize_independent( sf->nonrd_keyframe = 0; sf->svc_use_lowres_part = 0; sf->re_encode_overshoot_rt = 0; + sf->disable_16x16part_nonkey = 0; + sf->disable_golden_ref = 0; + sf->enable_tpl_model = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -537,8 +569,14 @@ static void set_rt_speed_feature_framesize_independent( if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1; if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && - cpi->oxcf.content == VP9E_CONTENT_SCREEN) + (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) { sf->re_encode_overshoot_rt = 1; + } + if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && + cm->width <= 1280 && cm->height <= 720) { + sf->use_altref_onepass = 1; + sf->use_compound_nonrd_pickmode = 1; + } } if (speed >= 6) { @@ -621,6 +659,12 @@ static void set_rt_speed_feature_framesize_independent( cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) sf->svc_use_lowres_part = 1; + // For SVC when golden is used as second temporal reference: to avoid + // encode time increase only use this feature on base temporal layer. + // (i.e remove golden flag from frame_flags for temporal_layer_id > 0). + if (cpi->use_svc && cpi->svc.use_gf_temporal_ref_current_layer && + cpi->svc.temporal_layer_id > 0) + cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); } if (speed >= 8) { @@ -661,6 +705,22 @@ static void set_rt_speed_feature_framesize_independent( sf->limit_newmv_early_exit = 0; sf->use_simple_block_yrd = 1; } + + if (speed >= 9) { + sf->mv.enable_adaptive_subpel_force_stop = 1; + sf->mv.adapt_subpel_force_stop.mv_thresh = 2; + if (cpi->rc.avg_frame_low_motion < 40) + sf->mv.adapt_subpel_force_stop.mv_thresh = 1; + sf->mv.adapt_subpel_force_stop.force_stop_below = 1; + sf->mv.adapt_subpel_force_stop.force_stop_above = 2; + // Disable partition blocks below 16x16, except for low-resolutions. + if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240) + sf->disable_16x16part_nonkey = 1; + // Allow for disabling GOLDEN reference, for CBR mode. + if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1; + if (cpi->rc.avg_frame_low_motion < 65) sf->default_interp_filter = BILINEAR; + } + if (sf->use_altref_onepass) { if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) { sf->partition_search_type = FIXED_PARTITION; @@ -675,6 +735,10 @@ static void set_rt_speed_feature_framesize_independent( (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->count_lastgolden_frame_usage)); } + if (cpi->svc.previous_frame_is_intra_only) { + sf->partition_search_type = FIXED_PARTITION; + sf->always_this_block_size = BLOCK_64X64; + } } void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { @@ -688,6 +752,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { sf->partition_search_breakout_thr.dist = (1 << 19); sf->partition_search_breakout_thr.rate = 80; sf->ml_partition_search_early_termination = 0; + sf->use_ml_partition_search_breakout = 0; if (oxcf->mode == REALTIME) { set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); @@ -780,6 +845,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = 99.0; sf->allow_acl = 1; +#if CONFIG_VP9_HIGHBITDEPTH + sf->enable_tpl_model = 0; +#else + sf->enable_tpl_model = 1; +#endif + sf->prune_ref_frame_for_rect_partitions = 0; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 946bf0545..fd4973fb2 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -161,6 +161,17 @@ typedef enum { ONE_LOOP_REDUCED = 1 } FAST_COEFF_UPDATE; +typedef struct ADAPT_SUBPEL_FORCE_STOP { + // Threshold for full pixel motion vector; + int mv_thresh; + + // subpel_force_stop if full pixel MV is below the threshold. + int force_stop_below; + + // subpel_force_stop if full pixel MV is equal to or above the threshold. + int force_stop_above; +} ADAPT_SUBPEL_FORCE_STOP; + typedef struct MV_SPEED_FEATURES { // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; @@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES { // 3: Stop at full pixel. int subpel_force_stop; + // If it's enabled, different subpel_force_stop will be used for different MV. + int enable_adaptive_subpel_force_stop; + + ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop; + // This variable sets the step_param used in full pel motion search. int fullpel_search_step_param; } MV_SPEED_FEATURES; @@ -258,6 +274,9 @@ typedef struct SPEED_FEATURES { // alternate reference frames. int allow_acl; + // Temporal dependency model based encoding mode optimization + int enable_tpl_model; + // Use transform domain distortion. Use pixel domain distortion in speed 0 // and certain situations in higher speed to improve the RD model precision. int allow_txfm_domain_distortion; @@ -300,6 +319,9 @@ typedef struct SPEED_FEATURES { int use_square_partition_only; BLOCK_SIZE use_square_only_threshold; + // Prune reference frames for rectangular partitions. + int prune_ref_frame_for_rect_partitions; + // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; @@ -451,6 +473,10 @@ typedef struct SPEED_FEATURES { // Partition search early breakout thresholds. PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr; + // Use ML-based partition search early breakout. + int use_ml_partition_search_breakout; + float ml_partition_search_breakout_thresh[3]; + // Machine-learning based partition search early termination int ml_partition_search_early_termination; @@ -515,6 +541,12 @@ typedef struct SPEED_FEATURES { // Enable re-encoding on scene change with potential high overshoot, // for real-time encoding flow. int re_encode_overshoot_rt; + + // Disable partitioning of 16x16 blocks. + int disable_16x16part_nonkey; + + // Allow for disabling golden reference. + int disable_golden_ref; } SPEED_FEATURES; struct VP9_COMP; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 07d1995a8..0b7e7fe80 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -29,10 +29,11 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; - svc->first_spatial_layer_to_encode = 0; svc->force_zero_mode_spatial_ref = 0; svc->use_base_mv = 0; svc->use_partition_reuse = 0; + svc->use_gf_temporal_ref = 1; + svc->use_gf_temporal_ref_current_layer = 0; svc->scaled_temp_is_alloc = 0; svc->scaled_one_half = 0; svc->current_superframe = 0; @@ -40,8 +41,15 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->skip_enhancement_layer = 0; svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON; svc->framedrop_mode = CONSTRAINED_LAYER_DROP; - - for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; + svc->set_intra_only_frame = 0; + svc->previous_frame_is_intra_only = 0; + svc->superframe_has_layer_sync = 0; + + for (i = 0; i < REF_FRAMES; ++i) { + svc->fb_idx_spatial_layer_id[i] = -1; + svc->fb_idx_temporal_layer_id[i] = -1; + svc->fb_idx_base[i] = 0; + } for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { svc->last_layer_dropped[sl] = 0; svc->drop_spatial_layer[sl] = 0; @@ -52,7 +60,16 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->downsample_filter_type[sl] = BILINEAR; svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter. svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark; + svc->fb_idx_upd_tl0[sl] = -1; + svc->drop_count[sl] = 0; + svc->spatial_layer_sync[sl] = 0; } + svc->max_consec_drop = INT_MAX; + + svc->buffer_gf_temporal_ref[1].idx = 7; + svc->buffer_gf_temporal_ref[0].idx = 6; + svc->buffer_gf_temporal_ref[1].is_used = 0; + svc->buffer_gf_temporal_ref[0].is_used = 0; if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH, @@ -665,24 +682,24 @@ void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) { int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { int width = 0, height = 0; + SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; - cpi->svc.skip_enhancement_layer = 0; - if (cpi->svc.number_spatial_layers > 1) { - cpi->svc.use_base_mv = 1; - cpi->svc.use_partition_reuse = 1; + svc->skip_enhancement_layer = 0; + if (svc->number_spatial_layers > 1) { + svc->use_base_mv = 1; + svc->use_partition_reuse = 1; } - cpi->svc.force_zero_mode_spatial_ref = 1; - cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride; + svc->force_zero_mode_spatial_ref = 1; + svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride; - if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { set_flags_and_fb_idx_for_temporal_mode3(cpi); - } else if (cpi->svc.temporal_layering_mode == + } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); - } else if (cpi->svc.temporal_layering_mode == - VP9E_TEMPORAL_LAYERING_MODE_0101) { + } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) { set_flags_and_fb_idx_for_temporal_mode2(cpi); - } else if (cpi->svc.temporal_layering_mode == + } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { // In the BYPASS/flexible mode, the encoder is relying on the application // to specify, for each spatial layer, the flags and buffer indices for the @@ -694,42 +711,82 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { // this case. if (cpi->ext_refresh_frame_flags_pending == 0) { int sl; - cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; - sl = cpi->svc.spatial_layer_id; - vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]); - cpi->lst_fb_idx = cpi->svc.lst_fb_idx[sl]; - cpi->gld_fb_idx = cpi->svc.gld_fb_idx[sl]; - cpi->alt_fb_idx = cpi->svc.alt_fb_idx[sl]; + svc->spatial_layer_id = svc->spatial_layer_to_encode; + sl = svc->spatial_layer_id; + vp9_apply_encoding_flags(cpi, svc->ext_frame_flags[sl]); + cpi->lst_fb_idx = svc->lst_fb_idx[sl]; + cpi->gld_fb_idx = svc->gld_fb_idx[sl]; + cpi->alt_fb_idx = svc->alt_fb_idx[sl]; + } + } + + if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[0].idx || + cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[0].idx || + cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[0].idx) + svc->buffer_gf_temporal_ref[0].is_used = 1; + if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[1].idx || + cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[1].idx || + cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[1].idx) + svc->buffer_gf_temporal_ref[1].is_used = 1; + + // For the fixed (non-flexible/bypass) SVC mode: + // If long term temporal reference is enabled at the sequence level + // (use_gf_temporal_ref == 1), and inter_layer is disabled (on inter-frames), + // we can use golden as a second temporal reference + // (since the spatial/inter-layer reference is disabled). + // We check that the fb_idx for this reference (buffer_gf_temporal_ref.idx) is + // unused (slot 7 and 6 should be available for 3-3 layer system). + // For now usage of this second temporal reference will only be used for + // highest and next to highest spatial layer (i.e., top and middle layer for + // 3 spatial layers). + svc->use_gf_temporal_ref_current_layer = 0; + if (svc->use_gf_temporal_ref && !svc->buffer_gf_temporal_ref[0].is_used && + !svc->buffer_gf_temporal_ref[1].is_used && + svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->disable_inter_layer_pred != INTER_LAYER_PRED_ON && + svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 && + svc->spatial_layer_id >= svc->number_spatial_layers - 2) { + // Enable the second (long-term) temporal reference at the frame-level. + svc->use_gf_temporal_ref_current_layer = 1; + } + + // Check if current superframe has any layer sync, only check once on + // base layer. + if (svc->spatial_layer_id == 0) { + int sl = 0; + // Default is no sync. + svc->superframe_has_layer_sync = 0; + for (sl = 0; sl < svc->number_spatial_layers; ++sl) { + if (cpi->svc.spatial_layer_sync[sl]) svc->superframe_has_layer_sync = 1; } } // Reset the drop flags for all spatial layers, on the base layer. - if (cpi->svc.spatial_layer_id == 0) { - vp9_zero(cpi->svc.drop_spatial_layer); - // TODO(jianj/marpan): Investigate why setting cpi->svc.lst/gld/alt_fb_idx + if (svc->spatial_layer_id == 0) { + vp9_zero(svc->drop_spatial_layer); + // TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx // causes an issue with frame dropping and temporal layers, when the frame // flags are passed via the encode call (bypass mode). Issue is that we're // resetting ext_refresh_frame_flags_pending to 0 on frame drops. - if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - memset(&cpi->svc.lst_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx)); - memset(&cpi->svc.gld_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx)); - memset(&cpi->svc.alt_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx)); + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx)); + memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx)); + memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx)); } - vp9_zero(cpi->svc.update_last); - vp9_zero(cpi->svc.update_golden); - vp9_zero(cpi->svc.update_altref); - vp9_zero(cpi->svc.reference_last); - vp9_zero(cpi->svc.reference_golden); - vp9_zero(cpi->svc.reference_altref); + vp9_zero(svc->update_last); + vp9_zero(svc->update_golden); + vp9_zero(svc->update_altref); + vp9_zero(svc->reference_last); + vp9_zero(svc->reference_golden); + vp9_zero(svc->reference_altref); } - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id]; + lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + + svc->temporal_layer_id]; // Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS, // only for non-BYPASS mode for now. - if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { RATE_CONTROL *const lrc = &lc->rc; lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q); lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q); @@ -741,58 +798,58 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { // Use Eightap_smooth for low resolutions. if (width * height <= 320 * 240) - cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = - EIGHTTAP_SMOOTH; + svc->downsample_filter_type[svc->spatial_layer_id] = EIGHTTAP_SMOOTH; // For scale factors > 0.75, set the phase to 0 (aligns decimated pixel // to source pixel). - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id]; + lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + + svc->temporal_layer_id]; if (lc->scaling_factor_num > (3 * lc->scaling_factor_den) >> 2) - cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 0; + svc->downsample_filter_phase[svc->spatial_layer_id] = 0; // The usage of use_base_mv or partition_reuse assumes down-scale of 2x2. // For now, turn off use of base motion vectors and partition reuse if the // spatial scale factors for any layers are not 2, // keep the case of 3 spatial layers with scale factor of 4x4 for base layer. // TODO(marpan): Fix this to allow for use_base_mv for scale factors != 2. - if (cpi->svc.number_spatial_layers > 1) { + if (svc->number_spatial_layers > 1) { int sl; - for (sl = 0; sl < cpi->svc.number_spatial_layers - 1; ++sl) { - lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id]; + for (sl = 0; sl < svc->number_spatial_layers - 1; ++sl) { + lc = &svc->layer_context[sl * svc->number_temporal_layers + + svc->temporal_layer_id]; if ((lc->scaling_factor_num != lc->scaling_factor_den >> 1) && !(lc->scaling_factor_num == lc->scaling_factor_den >> 2 && sl == 0 && - cpi->svc.number_spatial_layers == 3)) { - cpi->svc.use_base_mv = 0; - cpi->svc.use_partition_reuse = 0; + svc->number_spatial_layers == 3)) { + svc->use_base_mv = 0; + svc->use_partition_reuse = 0; break; } } // For non-zero spatial layers: if the previous spatial layer was dropped // disable the base_mv and partition_reuse features. - if (cpi->svc.spatial_layer_id > 0 && - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) { - cpi->svc.use_base_mv = 0; - cpi->svc.use_partition_reuse = 0; + if (svc->spatial_layer_id > 0 && + svc->drop_spatial_layer[svc->spatial_layer_id - 1]) { + svc->use_base_mv = 0; + svc->use_partition_reuse = 0; } } - cpi->svc.non_reference_frame = 0; + svc->non_reference_frame = 0; if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame && !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame) { - cpi->svc.non_reference_frame = 1; + svc->non_reference_frame = 1; } - if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0; + if (svc->spatial_layer_id == 0) svc->high_source_sad_superframe = 0; - if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && - cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id]) { + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->last_layer_dropped[svc->spatial_layer_id] && + svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 && + !svc->layer_context[svc->temporal_layer_id].is_key_frame) { // For fixed/non-flexible mode, if the previous frame (same spatial layer // from previous superframe) was dropped, make sure the lst_fb_idx // for this frame corresponds to the buffer index updated on (last) encoded // TL0 frame (with same spatial layer). - cpi->lst_fb_idx = cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id]; + cpi->lst_fb_idx = svc->fb_idx_upd_tl0[svc->spatial_layer_id]; } if (vp9_set_size_literal(cpi, width, height) != 0) @@ -886,8 +943,11 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) { // Check for disabling inter-layer (spatial) prediction, if // svc.disable_inter_layer_pred is set. If the previous spatial layer was // dropped then disable the prediction from this (scaled) reference. + // For INTER_LAYER_PRED_OFF_NONKEY: inter-layer prediction is disabled + // on key frames or if any spatial layer is a sync layer. if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY && - !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + !cpi->svc.superframe_has_layer_sync) || cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF || cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) { MV_REFERENCE_FRAME ref_frame; @@ -903,12 +963,11 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) { } } } - // Check for disabling inter-layer prediction if - // INTER_LAYER_PRED_ON_CONSTRAINED is enabled. - // If the reference for inter-layer prediction (the reference that is scaled) - // is not the previous spatial layer from the same superframe, then we - // disable inter-layer prediction. - if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_ON_CONSTRAINED) { + // Check for disabling inter-layer prediction if the reference for inter-layer + // prediction (the reference that is scaled) is not the previous spatial layer + // from the same superframe, then we disable inter-layer prediction. + // Only need to check when inter_layer prediction is not set to OFF mode. + if (cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_OFF) { // We only use LAST and GOLDEN for prediction in real-time mode, so we // check both here. MV_REFERENCE_FRAME ref_frame; @@ -940,3 +999,102 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) { } } } + +void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // For fixed/non-flexible mode, the folllowing constraint are expected, + // when inter-layer prediciton is on (default). + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON && + svc->framedrop_mode != LAYER_DROP) { + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // On non-key frames: LAST is always temporal reference, GOLDEN is + // spatial reference. + if (svc->temporal_layer_id == 0) + // Base temporal only predicts from base temporal. + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0); + else + // Non-base temporal only predicts from lower temporal layer. + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] < + svc->temporal_layer_id); + if (svc->spatial_layer_id > 0) { + // Non-base spatial only predicts from lower spatial layer with same + // temporal_id. + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == + svc->temporal_layer_id); + } + } else if (svc->spatial_layer_id > 0) { + // Only 1 reference for frame whose base is key; reference may be LAST + // or GOLDEN, so we check both. + if (cpi->ref_frame_flags & VP9_LAST_FLAG) { + assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == + svc->temporal_layer_id); + } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == + svc->temporal_layer_id); + } + } + } else if (svc->use_gf_temporal_ref_current_layer && + !svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // If the usage of golden as second long term reference is enabled for this + // layer, then temporal_layer_id of that reference must be base temporal + // layer 0, and spatial_layer_id of that reference must be same as current + // spatial_layer_id. + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0); + } +} + +void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // Only for superframes whose base is not key, as those are + // already sync frames. + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { + if (svc->spatial_layer_id == 0) { + // On base spatial layer: if the current superframe has a layer sync then + // reset the pattern counters and reset to base temporal layer. + if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi); + } + // If the layer sync is set for this current spatial layer then + // disable the temporal reference. + if (svc->spatial_layer_id > 0 && + svc->spatial_layer_sync[svc->spatial_layer_id]) { + cpi->ref_frame_flags &= (~VP9_LAST_FLAG); + if (svc->use_gf_temporal_ref_current_layer) { + int index = svc->spatial_layer_id; + // If golden is used as second reference: need to remove it from + // prediction, reset refresh period to 0, and update the reference. + svc->use_gf_temporal_ref_current_layer = 0; + cpi->rc.baseline_gf_interval = 0; + cpi->rc.frames_till_gf_update_due = 0; + // On layer sync frame we must update the buffer index used for long + // term reference. Use the alt_ref since it is not used or updated on + // sync frames. + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + cpi->ext_refresh_alt_ref_frame = 1; + } + } + } +} + +void vp9_svc_update_ref_frame_buffer_idx(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // Update the usage of frame buffer index for base spatial layers. + if (svc->spatial_layer_id == 0) { + if ((cpi->ref_frame_flags & VP9_LAST_FLAG) || cpi->refresh_last_frame) + svc->fb_idx_base[cpi->lst_fb_idx] = 1; + if ((cpi->ref_frame_flags & VP9_GOLD_FLAG) || cpi->refresh_golden_frame) + svc->fb_idx_base[cpi->gld_fb_idx] = 1; + if ((cpi->ref_frame_flags & VP9_ALT_FLAG) || cpi->refresh_alt_ref_frame) + svc->fb_idx_base[cpi->alt_fb_idx] = 1; + } +} diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 617717049..0ac1a7315 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -24,7 +24,7 @@ typedef enum { INTER_LAYER_PRED_ON, // Inter-layer prediction is off on all frames. INTER_LAYER_PRED_OFF, - // Inter-layer prediction is off on non-key frames. + // Inter-layer prediction is off on non-key frames and non-sync frames. INTER_LAYER_PRED_OFF_NONKEY, // Inter-layer prediction is on on all frames, but constrained such // that any layer S (> 0) can only predict from previous spatial @@ -32,6 +32,11 @@ typedef enum { INTER_LAYER_PRED_ON_CONSTRAINED } INTER_LAYER_PRED; +typedef struct BUFFER_LONGTERM_REF { + int idx; + int is_used; +} BUFFER_LONGTERM_REF; + typedef struct { RATE_CONTROL rc; int target_bandwidth; @@ -69,7 +74,6 @@ typedef struct SVC { int number_temporal_layers; int spatial_layer_to_encode; - int first_spatial_layer_to_encode; // Workaround for multiple frame contexts enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state; @@ -96,8 +100,13 @@ typedef struct SVC { int lst_fb_idx[VPX_MAX_LAYERS]; int gld_fb_idx[VPX_MAX_LAYERS]; int alt_fb_idx[VPX_MAX_LAYERS]; - int ref_frame_index[REF_FRAMES]; int force_zero_mode_spatial_ref; + // Sequence level flag to enable second (long term) temporal reference. + int use_gf_temporal_ref; + // Frame level flag to enable second (long term) temporal reference. + int use_gf_temporal_ref_current_layer; + // Allow second reference for at most 2 top highest resolution layers. + BUFFER_LONGTERM_REF buffer_gf_temporal_ref[2]; int current_superframe; int non_reference_frame; int use_base_mv; @@ -122,6 +131,8 @@ typedef struct SVC { int last_layer_dropped[VPX_MAX_LAYERS]; int drop_spatial_layer[VPX_MAX_LAYERS]; int framedrop_thresh[VPX_MAX_LAYERS]; + int drop_count[VPX_MAX_LAYERS]; + int max_consec_drop; SVC_LAYER_DROP_MODE framedrop_mode; INTER_LAYER_PRED disable_inter_layer_pred; @@ -141,7 +152,19 @@ typedef struct SVC { // Keep track of the frame buffer index updated/refreshed on the base // temporal superframe. - uint8_t fb_idx_upd_tl0[VPX_SS_MAX_LAYERS]; + int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS]; + + // Keep track of the spatial and temporal layer id of the frame that last + // updated the frame buffer index. + uint8_t fb_idx_spatial_layer_id[REF_FRAMES]; + uint8_t fb_idx_temporal_layer_id[REF_FRAMES]; + + int spatial_layer_sync[VPX_SS_MAX_LAYERS]; + uint8_t set_intra_only_frame; + uint8_t previous_frame_is_intra_only; + uint8_t superframe_has_layer_sync; + + uint8_t fb_idx_base[REF_FRAMES]; } SVC; struct VP9_COMP; @@ -201,6 +224,12 @@ void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi); void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi); +void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi); + +void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi); + +void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 2758c42ae..4db3e6f8e 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -620,13 +620,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_row, tile_col; - MACROBLOCKD *mbd = &cpi->td.mb.e_mbd; - // Save input state - uint8_t *input_buffer[MAX_MB_PLANE]; - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf; - vp9_init_tile_data(cpi); for (tile_row = 0; tile_row < tile_rows; ++tile_row) { @@ -634,9 +627,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) { temporal_filter_iterate_tile_c(cpi, tile_row, tile_col); } } - - // Restore input state - for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i]; } // Apply buffer limits and context specific adjustments to arnr filter. diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index d40d3c445..7ca4004b0 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -68,6 +68,7 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c +VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht16x16_add_neon.c diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 5eaa7a18a..13c42c75f 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(extra_cfg, row_mt, 0, 1); RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); - RANGE_CHECK(extra_cfg, cpu_used, -8, 8); + RANGE_CHECK(extra_cfg, cpu_used, -9, 9); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); RANGE_CHECK(extra_cfg, tile_columns, 0, 6); RANGE_CHECK(extra_cfg, tile_rows, 0, 2); @@ -1436,7 +1436,6 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; - svc->first_spatial_layer_to_encode = data->spatial_layer_id; svc->spatial_layer_to_encode = data->spatial_layer_id; svc->temporal_layer_id = data->temporal_layer_id; // Checks on valid layer_id input. @@ -1444,10 +1443,7 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } - if (svc->first_spatial_layer_to_encode < 0 || - svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) { - return VPX_CODEC_INVALID_PARAM; - } + return VPX_CODEC_OK; } @@ -1536,6 +1532,28 @@ static vpx_codec_err_t ctrl_set_svc_frame_drop_layer(vpx_codec_alg_priv_t *ctx, cpi->svc.framedrop_mode = data->framedrop_mode; for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) cpi->svc.framedrop_thresh[sl] = data->framedrop_thresh[sl]; + // Don't allow max_consec_drop values below 1. + cpi->svc.max_consec_drop = VPXMAX(1, data->max_consec_drop); + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_svc_gf_temporal_ref(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + const unsigned int data = va_arg(args, unsigned int); + cpi->svc.use_gf_temporal_ref = data; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_svc_spatial_layer_sync( + vpx_codec_alg_priv_t *ctx, va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_spatial_layer_sync_t *data = + va_arg(args, vpx_svc_spatial_layer_sync_t *); + int sl; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) + cpi->svc.spatial_layer_sync[sl] = data->spatial_layer_sync[sl]; + cpi->svc.set_intra_only_frame = data->base_layer_intra_only; return VPX_CODEC_OK; } @@ -1624,6 +1642,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred }, { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer }, + { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref }, + { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync }, // Getters { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer }, diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 657490f4b..7f45ab28f 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -97,7 +97,7 @@ static vpx_codec_err_t decoder_peek_si_internal( const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si, int *is_intra_only, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { int intra_only_flag = 0; - uint8_t clear_buffer[10]; + uint8_t clear_buffer[11]; if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; @@ -158,6 +158,9 @@ static vpx_codec_err_t decoder_peek_si_internal( if (profile > PROFILE_0) { if (!parse_bitdepth_colorspace_sampling(profile, &rb)) return VPX_CODEC_UNSUP_BITSTREAM; + // The colorspace info may cause vp9_read_frame_size() to need 11 + // bytes. + if (data_sz < 11) return VPX_CODEC_UNSUP_BITSTREAM; } rb.bit_offset += REF_FRAMES; // refresh_frame_flags vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); @@ -235,6 +238,19 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, vp9_ppflags_t *flags) { flags->noise_level = ctx->postproc_cfg.noise_level; } +#undef ERROR +#define ERROR(str) \ + do { \ + ctx->base.err_detail = str; \ + return VPX_CODEC_INVALID_PARAM; \ + } while (0) + +#define RANGE_CHECK(p, memb, lo, hi) \ + do { \ + if (!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \ + ERROR(#memb " out of range [" #lo ".." #hi "]"); \ + } while (0) + static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { ctx->last_show_frame = -1; ctx->need_resync = 1; @@ -251,6 +267,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { ctx->pbi->max_threads = ctx->cfg.threads; ctx->pbi->inv_tile_order = ctx->invert_tile_order; + RANGE_CHECK(ctx, row_mt, 0, 1); + ctx->pbi->row_mt = ctx->row_mt; + // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) @@ -632,6 +651,13 @@ static vpx_codec_err_t ctrl_set_spatial_layer_svc(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->row_mt = va_arg(args, int); + + return VPX_CODEC_OK; +} + static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, @@ -643,6 +669,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment }, { VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter }, { VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc }, + { VP9D_SET_ROW_MT, ctrl_set_row_mt }, // Getters { VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer }, diff --git a/vp9/vp9_dx_iface.h b/vp9/vp9_dx_iface.h index 18bc7ab0d..6a101b03d 100644 --- a/vp9/vp9_dx_iface.h +++ b/vp9/vp9_dx_iface.h @@ -45,6 +45,7 @@ struct vpx_codec_alg_priv { // Allow for decoding up to a given spatial layer for SVC stream. int svc_decoding; int svc_spatial_layer; + int row_mt; }; #endif // VP9_VP9_DX_IFACE_H_ diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 6186d4614..d5b167bf7 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -140,6 +140,8 @@ VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h +VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c + # Strip unnecessary files with CONFIG_REALTIME_ONLY VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_firstpass.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_mbgraph.c |