summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/ppc/vp9_idct_vsx.c115
-rw-r--r--vp9/common/vp9_blockd.h7
-rw-r--r--vp9/common/vp9_rtcd_defs.pl10
-rw-r--r--vp9/decoder/vp9_decodeframe.c26
-rw-r--r--vp9/decoder/vp9_decoder.c1
-rw-r--r--vp9/decoder/vp9_decoder.h2
-rw-r--r--vp9/encoder/ppc/vp9_quantize_vsx.c293
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c45
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h3
-rw-r--r--vp9/encoder/vp9_aq_variance.c35
-rw-r--r--vp9/encoder/vp9_aq_variance.h4
-rw-r--r--vp9/encoder/vp9_block.h3
-rw-r--r--vp9/encoder/vp9_context_tree.h2
-rw-r--r--vp9/encoder/vp9_denoiser.c18
-rw-r--r--vp9/encoder/vp9_denoiser.h5
-rw-r--r--vp9/encoder/vp9_encodeframe.c973
-rw-r--r--vp9/encoder/vp9_encoder.c1045
-rw-r--r--vp9/encoder/vp9_encoder.h44
-rw-r--r--vp9/encoder/vp9_firstpass.c680
-rw-r--r--vp9/encoder/vp9_firstpass.h42
-rw-r--r--vp9/encoder/vp9_pickmode.c176
-rw-r--r--vp9/encoder/vp9_ratectrl.c526
-rw-r--r--vp9/encoder/vp9_ratectrl.h16
-rw-r--r--vp9/encoder/vp9_rd.c32
-rw-r--r--vp9/encoder/vp9_rd.h7
-rw-r--r--vp9/encoder/vp9_rdopt.c27
-rw-r--r--vp9/encoder/vp9_speed_features.c97
-rw-r--r--vp9/encoder/vp9_speed_features.h32
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c292
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h37
-rw-r--r--vp9/encoder/vp9_temporal_filter.c10
-rw-r--r--vp9/vp9_common.mk1
-rw-r--r--vp9/vp9_cx_iface.c32
-rw-r--r--vp9/vp9_dx_iface.c29
-rw-r--r--vp9/vp9_dx_iface.h1
-rw-r--r--vp9/vp9cx.mk2
36 files changed, 4123 insertions, 547 deletions
diff --git a/vp9/common/ppc/vp9_idct_vsx.c b/vp9/common/ppc/vp9_idct_vsx.c
new file mode 100644
index 000000000..1b2a93edb
--- /dev/null
+++ b/vp9/common/ppc/vp9_idct_vsx.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/ppc/inv_txfm_vsx.h"
+#include "vpx_dsp/ppc/bitdepth_conversion_vsx.h"
+
+#include "vp9/common/vp9_enums.h"
+
+void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ int16x8_t in[2], out[2];
+
+ in[0] = load_tran_low(0, input);
+ in[1] = load_tran_low(8 * sizeof(*input), input);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct4_vsx(in, out);
+ vpx_idct4_vsx(out, in);
+ break;
+ case ADST_DCT:
+ vpx_idct4_vsx(in, out);
+ vp9_iadst4_vsx(out, in);
+ break;
+ case DCT_ADST:
+ vp9_iadst4_vsx(in, out);
+ vpx_idct4_vsx(out, in);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vp9_iadst4_vsx(in, out);
+ vp9_iadst4_vsx(out, in);
+ break;
+ }
+
+ vpx_round_store4x4_vsx(in, out, dest, stride);
+}
+
+void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ int16x8_t in[8], out[8];
+
+ // load input data
+ in[0] = load_tran_low(0, input);
+ in[1] = load_tran_low(8 * sizeof(*input), input);
+ in[2] = load_tran_low(2 * 8 * sizeof(*input), input);
+ in[3] = load_tran_low(3 * 8 * sizeof(*input), input);
+ in[4] = load_tran_low(4 * 8 * sizeof(*input), input);
+ in[5] = load_tran_low(5 * 8 * sizeof(*input), input);
+ in[6] = load_tran_low(6 * 8 * sizeof(*input), input);
+ in[7] = load_tran_low(7 * 8 * sizeof(*input), input);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct8_vsx(in, out);
+ vpx_idct8_vsx(out, in);
+ break;
+ case ADST_DCT:
+ vpx_idct8_vsx(in, out);
+ vp9_iadst8_vsx(out, in);
+ break;
+ case DCT_ADST:
+ vp9_iadst8_vsx(in, out);
+ vpx_idct8_vsx(out, in);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vp9_iadst8_vsx(in, out);
+ vp9_iadst8_vsx(out, in);
+ break;
+ }
+
+ vpx_round_store8x8_vsx(in, dest, stride);
+}
+
+void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ int16x8_t in0[16], in1[16];
+
+ LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0);
+ LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input),
+ 8 * sizeof(*input), in1);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct16_vsx(in0, in1);
+ vpx_idct16_vsx(in0, in1);
+ break;
+ case ADST_DCT:
+ vpx_idct16_vsx(in0, in1);
+ vpx_iadst16_vsx(in0, in1);
+ break;
+ case DCT_ADST:
+ vpx_iadst16_vsx(in0, in1);
+ vpx_idct16_vsx(in0, in1);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vpx_iadst16_vsx(in0, in1);
+ vpx_iadst16_vsx(in0, in1);
+ break;
+ }
+
+ vpx_round_store16x16_vsx(in0, in1, dest, stride);
+}
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 780b29208..147380650 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -60,6 +60,13 @@ typedef struct {
#define GOLDEN_FRAME 2
#define ALTREF_FRAME 3
#define MAX_REF_FRAMES 4
+
+#define LAST2_FRAME 4
+#define LAST3_FRAME 5
+#define BWDREF_FRAME 6
+#define ALTREF2_FRAME 7
+#define LAST_REF_FRAMES 3
+
typedef int8_t MV_REFERENCE_FRAME;
// This structure now relates to 8x8 block regions.
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 20c9ed641..6d7f95260 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -67,9 +67,9 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off.
- specialize qw/vp9_iht4x4_16_add neon sse2/;
- specialize qw/vp9_iht8x8_64_add neon sse2/;
- specialize qw/vp9_iht16x16_256_add neon sse2/;
+ specialize qw/vp9_iht4x4_16_add neon sse2 vsx/;
+ specialize qw/vp9_iht8x8_64_add neon sse2 vsx/;
+ specialize qw/vp9_iht16x16_256_add neon sse2 vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
specialize qw/vp9_iht4x4_16_add dspr2 msa/;
@@ -129,10 +129,10 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-specialize qw/vp9_quantize_fp neon sse2 avx2/, "$ssse3_x86_64";
+specialize qw/vp9_quantize_fp neon sse2 avx2 vsx/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";
+specialize qw/vp9_quantize_fp_32x32 neon vsx/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index d0e896c13..9c793f710 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1148,9 +1148,15 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
// Allocations in vp9_alloc_context_buffers() depend on individual
// dimensions as well as the overall size.
if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) {
- if (vp9_alloc_context_buffers(cm, width, height))
+ if (vp9_alloc_context_buffers(cm, width, height)) {
+ // The cm->mi_* values have been cleared and any existing context
+ // buffers have been freed. Clear cm->width and cm->height to be
+ // consistent and to force a realloc next time.
+ cm->width = 0;
+ cm->height = 0;
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
+ }
} else {
vp9_set_mb_mi(cm, width, height);
}
@@ -1528,7 +1534,7 @@ static int tile_worker_hook(void *arg1, void *arg2) {
static int compare_tile_buffers(const void *a, const void *b) {
const TileBuffer *const buf1 = (const TileBuffer *)a;
const TileBuffer *const buf2 = (const TileBuffer *)b;
- return (int)(buf2->size - buf1->size);
+ return (int)((int64_t)buf2->size - buf1->size);
}
static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
@@ -1724,6 +1730,21 @@ static void read_bitdepth_colorspace_sampling(VP9_COMMON *cm,
}
}
+static INLINE void flush_all_fb_on_key(VP9_COMMON *cm) {
+ if (cm->frame_type == KEY_FRAME && cm->current_video_frame > 0) {
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ BufferPool *const pool = cm->buffer_pool;
+ int i;
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
+ frame_bufs[i].ref_count = 0;
+ if (!frame_bufs[i].released) {
+ pool->release_fb_cb(pool->cb_priv, &frame_bufs[i].raw_frame_buffer);
+ frame_bufs[i].released = 1;
+ }
+ }
+ }
+}
+
static size_t read_uncompressed_header(VP9Decoder *pbi,
struct vpx_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
@@ -1788,6 +1809,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
setup_frame_size(cm, rb);
if (pbi->need_resync) {
memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+ flush_all_fb_on_key(cm);
pbi->need_resync = 0;
}
} else {
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index a913fa560..d6eacaf44 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -69,6 +69,7 @@ static void vp9_dec_free_mi(VP9_COMMON *cm) {
cm->mip = NULL;
vpx_free(cm->mi_grid_base);
cm->mi_grid_base = NULL;
+ cm->mi_alloc_size = 0;
}
VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 4b26c314d..5f22c00cb 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -72,6 +72,8 @@ typedef struct VP9Decoder {
int inv_tile_order;
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer.
+
+ int row_mt;
} VP9Decoder;
int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size,
diff --git a/vp9/encoder/ppc/vp9_quantize_vsx.c b/vp9/encoder/ppc/vp9_quantize_vsx.c
new file mode 100644
index 000000000..3720b0876
--- /dev/null
+++ b/vp9/encoder/ppc/vp9_quantize_vsx.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+
+#include "./vp9_rtcd.h"
+#include "vpx_dsp/ppc/types_vsx.h"
+
+// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
+// integers, and return the high 16 bits of the intermediate integers.
+// (a * b) >> 16
+// Note: Because this is done in 2 operations, a and b cannot both be UINT16_MIN
+static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) {
+ // madds does ((A * B) >> 15) + C, we need >> 16, so we perform an extra right
+ // shift.
+ return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16);
+}
+
+// Negate 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative.
+static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) {
+ const int16x8_t mask = vec_sra(b, vec_shift_sign_s16);
+ return vec_xor(vec_add(a, mask), mask);
+}
+
+// Compare packed 16-bit integers across a, and return the maximum value in
+// every element. Returns a vector containing the biggest value across vector a.
+static INLINE int16x8_t vec_max_across(int16x8_t a) {
+ a = vec_max(a, vec_perm(a, a, vec_perm64));
+ a = vec_max(a, vec_perm(a, a, vec_perm32));
+ return vec_max(a, vec_perm(a, a, vec_perm16));
+}
+
+void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr,
+ const int16_t *iscan_ptr) {
+ int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
+ bool16x8_t zero_coeff0, zero_coeff1;
+
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr);
+
+ (void)scan_ptr;
+ (void)skip_block;
+ assert(!skip_block);
+
+ // First set of 8 coeff starts with DC + 7 AC
+ qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant);
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
+
+ // Remove DC value from round and quant
+ round = vec_splat(round, 1);
+ quant = vec_splat(quant, 1);
+
+ // Remove DC value from dequant
+ dequant = vec_splat(dequant, 1);
+
+ // Second set of 8 coeff starts with (all AC)
+ qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
+
+ eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1));
+
+ // We quantize 16 coeff up front (enough for a 4x4) and process 24 coeff per
+ // loop iteration.
+ // for 8x8: 16 + 2 x 24 = 64
+ // for 16x16: 16 + 10 x 24 = 256
+ if (n_coeffs > 16) {
+ int16x8_t coeff2, qcoeff2, dqcoeff2, eob2, scan2;
+ bool16x8_t zero_coeff2;
+
+ int index = 16;
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+
+ do {
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+ scan0 = vec_vsx_ld(off0, iscan_ptr);
+ scan1 = vec_vsx_ld(off1, iscan_ptr);
+ scan2 = vec_vsx_ld(off2, iscan_ptr);
+
+ qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant);
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
+
+ qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
+
+ qcoeff2 = vec_mulhi(vec_vaddshs(vec_abs(coeff2), round), quant);
+ zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16);
+ qcoeff2 = vec_sign(qcoeff2, coeff2);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+ dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
+
+ eob = vec_max(eob, vec_or(scan0, zero_coeff0));
+ eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2));
+ eob = vec_max(eob, eob2);
+
+ index += 24;
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ } while (index < n_coeffs);
+ }
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0] + 1;
+}
+
+// Sets the value of a 32-bit integers to 1 when the corresponding value in a is
+// negative.
+static INLINE int32x4_t vec_is_neg(int32x4_t a) {
+ return vec_sr(a, vec_shift_sign_s32);
+}
+
+// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32
+// blocks are twice as big as for other block sizes. As such, using
+// vec_mladd results in overflow.
+static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
+ int16x8_t dequant) {
+ int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
+ int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
+ // Add 1 if negative to round towards zero because the C uses division.
+ dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe));
+ dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
+ dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
+ dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
+ return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack);
+}
+
+void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr,
+ const int16_t *iscan_ptr) {
+ // In stage 1, we quantize 16 coeffs (DC + 15 AC)
+ // In stage 2, we loop 42 times and quantize 24 coeffs per iteration
+ // (32 * 32 - 16) / 24 = 42
+ int num_itr = 42;
+ // Offsets are in bytes, 16 coeffs = 32 bytes
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+
+ int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
+ bool16x8_t mask0, mask1, zero_coeff0, zero_coeff1;
+
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr);
+ int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2));
+ int16x8_t abs_coeff0 = vec_abs(coeff0);
+ int16x8_t abs_coeff1 = vec_abs(coeff1);
+
+ (void)scan_ptr;
+ (void)skip_block;
+ (void)n_coeffs;
+ assert(!skip_block);
+
+ mask0 = vec_cmpge(abs_coeff0, thres);
+ round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16);
+ // First set of 8 coeff starts with DC + 7 AC
+ qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16);
+ qcoeff0 = vec_and(qcoeff0, mask0);
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+
+ dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant);
+ vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
+
+ // Remove DC value from thres, round, quant and dequant
+ thres = vec_splat(thres, 1);
+ round = vec_splat(round, 1);
+ quant = vec_splat(quant, 1);
+ dequant = vec_splat(dequant, 1);
+
+ mask1 = vec_cmpge(abs_coeff1, thres);
+
+ // Second set of 8 coeff starts with (all AC)
+ qcoeff1 =
+ vec_madds(vec_vaddshs(vec_abs(coeff1), round), quant, vec_zeros_s16);
+ qcoeff1 = vec_and(qcoeff1, mask1);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant);
+ vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
+
+ eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1));
+
+ do {
+ int16x8_t coeff2, abs_coeff2, qcoeff2, dqcoeff2, eob2, scan2;
+ bool16x8_t zero_coeff2, mask2;
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+ scan0 = vec_vsx_ld(off0, iscan_ptr);
+ scan1 = vec_vsx_ld(off1, iscan_ptr);
+ scan2 = vec_vsx_ld(off2, iscan_ptr);
+
+ abs_coeff0 = vec_abs(coeff0);
+ abs_coeff1 = vec_abs(coeff1);
+ abs_coeff2 = vec_abs(coeff2);
+
+ qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16);
+ qcoeff1 = vec_madds(vec_vaddshs(abs_coeff1, round), quant, vec_zeros_s16);
+ qcoeff2 = vec_madds(vec_vaddshs(abs_coeff2, round), quant, vec_zeros_s16);
+
+ mask0 = vec_cmpge(abs_coeff0, thres);
+ mask1 = vec_cmpge(abs_coeff1, thres);
+ mask2 = vec_cmpge(abs_coeff2, thres);
+
+ qcoeff0 = vec_and(qcoeff0, mask0);
+ qcoeff1 = vec_and(qcoeff1, mask1);
+ qcoeff2 = vec_and(qcoeff2, mask2);
+
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16);
+
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ qcoeff2 = vec_sign(qcoeff2, coeff2);
+
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+
+ dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant);
+ dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant);
+ dqcoeff2 = dequantize_coeff_32(qcoeff2, dequant);
+
+ vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
+
+ eob = vec_max(eob, vec_or(scan0, zero_coeff0));
+ eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2));
+ eob = vec_max(eob, eob2);
+
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ num_itr--;
+ } while (num_itr != 0);
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0] + 1;
+}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index b47840795..9f7be4f13 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -39,6 +39,7 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
}
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
+ cr->counter_encode_maxq_scene_change = 0;
return cr;
}
@@ -427,8 +428,11 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
double weight_segment_target = 0;
double weight_segment = 0;
int thresh_low_motion = (cm->width < 720) ? 55 : 20;
+ int qp_thresh = VPXMIN(20, rc->best_quality << 1);
cr->apply_cyclic_refresh = 1;
- if (cm->frame_type == KEY_FRAME || cpi->svc.temporal_layer_id > 0 ||
+ if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 ||
+ is_lossless_requested(&cpi->oxcf) ||
+ rc->avg_frame_qindex[INTER_FRAME] < qp_thresh ||
(cpi->use_svc &&
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
(!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion &&
@@ -457,6 +461,18 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_boost_fac = 13;
}
}
+ // For screen-content: keep rate_ratio_qdelta to 2.0 (segment#1 boost) and
+ // percent_refresh (refresh rate) to 10. But reduce rate boost for segment#2
+ // (rate_boost_fac = 10 disables segment#2).
+ // TODO(marpan): Consider increasing refresh rate after slide change.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
+ cr->percent_refresh = 10;
+ // Increase the amount of refresh on scene change that is encoded at max Q,
+ // increase for a few cycles of the refresh period (~30 frames).
+ if (cr->counter_encode_maxq_scene_change < 30) cr->percent_refresh = 15;
+ cr->rate_ratio_qdelta = 2.0;
+ cr->rate_boost_fac = 10;
+ }
// Adjust some parameters for low resolutions.
if (cm->width <= 352 && cm->height <= 288) {
if (rc->avg_frame_bandwidth < 3000) {
@@ -491,6 +507,13 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
num8x8bl;
if (weight_segment_target < 7 * weight_segment / 8)
weight_segment = weight_segment_target;
+ // For screen-content: don't include target for the weight segment,
+ // since for all flat areas the segment is reset, so its more accurate
+ // to just use the previous actual number of seg blocks for the weight.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ weight_segment =
+ (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) /
+ num8x8bl;
cr->weight_segment = weight_segment;
}
@@ -501,6 +524,8 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
struct segmentation *const seg = &cm->seg;
if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
+ // Reset if resoluton change has occurred.
+ if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi);
if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation)) {
// Set segmentation map to 0 and disable.
unsigned char *const seg_map = cpi->segmentation_map;
@@ -511,12 +536,14 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
cr->reduce_refresh = 0;
+ cr->counter_encode_maxq_scene_change = 0;
}
return;
} else {
int qindex_delta = 0;
int qindex2;
const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
+ cr->counter_encode_maxq_scene_change++;
vpx_clear_system_state();
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
@@ -566,9 +593,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
cr->qindex_delta[2] = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
- // Reset if resoluton change has occurred.
- if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi);
-
// Update the segmentation and refresh map.
cyclic_refresh_update_map(cpi);
}
@@ -582,8 +606,19 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
- memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
+ cr->counter_encode_maxq_scene_change = 0;
+}
+
+void vp9_cyclic_refresh_limit_q(const VP9_COMP *cpi, int *q) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ // For now apply hard limit to frame-level decrease in q, if the cyclic
+ // refresh is active (percent_refresh > 0).
+ if (cr->percent_refresh > 0 && cpi->rc.q_1_frame - *q > 8) {
+ *q = cpi->rc.q_1_frame - 8;
+ }
}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 77fa67c9e..50789e87c 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -68,6 +68,7 @@ struct CYCLIC_REFRESH {
int reduce_refresh;
double weight_segment;
int apply_cyclic_refresh;
+ int counter_encode_maxq_scene_change;
};
struct VP9_COMP;
@@ -139,6 +140,8 @@ static INLINE int cyclic_refresh_segment_id(int segment_id) {
return CR_SEGMENT_ID_BASE;
}
+void vp9_cyclic_refresh_limit_q(const struct VP9_COMP *cpi, int *q);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c
index 477f62ba5..9cd8819c3 100644
--- a/vp9/encoder/vp9_aq_variance.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -19,6 +19,7 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rd.h"
+#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_segmentation.h"
#define ENERGY_MIN (-4)
@@ -192,6 +193,40 @@ double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
return log(var + 1.0);
}
+// Get the range of sub block energy values;
+void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *min_e,
+ int *max_e) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ int x, y;
+
+ if (xmis < bw || ymis < bh) {
+ vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
+ *min_e = vp9_block_energy(cpi, mb, bsize);
+ *max_e = *min_e;
+ } else {
+ int energy;
+ *min_e = ENERGY_MAX;
+ *max_e = ENERGY_MIN;
+
+ for (y = 0; y < ymis; ++y) {
+ for (x = 0; x < xmis; ++x) {
+ vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x);
+ energy = vp9_block_energy(cpi, mb, BLOCK_8X8);
+ *min_e = VPXMIN(*min_e, energy);
+ *max_e = VPXMAX(*max_e, energy);
+ }
+ }
+ }
+
+ // Re-instate source pointers back to what they should have been on entry.
+ vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
+}
+
#define DEFAULT_E_MIDPOINT 10.0
int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy;
diff --git a/vp9/encoder/vp9_aq_variance.h b/vp9/encoder/vp9_aq_variance.h
index 211a69f39..12848f74c 100644
--- a/vp9/encoder/vp9_aq_variance.h
+++ b/vp9/encoder/vp9_aq_variance.h
@@ -20,7 +20,11 @@ extern "C" {
unsigned int vp9_vaq_segment_id(int energy);
void vp9_vaq_frame_setup(VP9_COMP *cpi);
+void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *min_e,
+ int *max_e);
int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+
double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 724205dd5..36cf227cb 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -92,6 +92,7 @@ struct macroblock {
int sadperbit4;
int rddiv;
int rdmult;
+ int cb_rdmult;
int mb_energy;
// These are set to their default values at the beginning, and then adjusted
@@ -180,6 +181,8 @@ struct macroblock {
int sb_pickmode_part;
+ int zero_temp_sad_source;
+
// For each superblock: saves the content value (e.g., low/high sad/sumdiff)
// based on source sad, prior to encoding the frame.
uint8_t content_state_sb;
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index 73423c075..2bcc26e94 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -75,6 +75,8 @@ typedef struct {
// Used for the machine learning-based early termination
int32_t sum_y_eobs;
+ // Skip certain ref frames during RD search of rectangular partitions.
+ uint8_t skip_ref_frame_mask;
} PICK_MODE_CONTEXT;
typedef struct PC_TREE {
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 8ec5dd91d..6abb082cd 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -189,7 +189,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,
- int use_svc, int spatial_layer) {
+ int use_svc, int spatial_layer, int use_gf_temporal_ref) {
const int sse_diff = (ctx->newmv_sse == UINT_MAX)
? 0
: ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);
@@ -220,7 +220,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME && frame != ALTREF_FRAME &&
- (frame != GOLDEN_FRAME || num_spatial_layers == 1) &&
+ (frame != GOLDEN_FRAME || num_spatial_layers == 1 ||
+ use_gf_temporal_ref) &&
sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
mi->ref_frame[0] = ctx->best_reference_frame;
mi->mode = ctx->best_sse_inter_mode;
@@ -230,7 +231,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
frame = ctx->best_zeromv_reference_frame;
ctx->newmv_sse = ctx->zeromv_sse;
// Bias to last reference.
- if (num_spatial_layers > 1 || frame == ALTREF_FRAME ||
+ if ((num_spatial_layers > 1 && !use_gf_temporal_ref) ||
+ frame == ALTREF_FRAME ||
(frame != LAST_FRAME &&
((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) ||
denoiser->denoising_level >= kDenHigh))) {
@@ -326,7 +328,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
- VP9_DENOISER_DECISION *denoiser_decision) {
+ VP9_DENOISER_DECISION *denoiser_decision,
+ int use_gf_temporal_ref) {
int mv_col, mv_row;
int motion_magnitude = 0;
int zeromv_filter = 0;
@@ -397,7 +400,8 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,
- cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id);
+ cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id,
+ use_gf_temporal_ref);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
@@ -448,13 +452,13 @@ void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
- int svc_base_is_key, int second_spatial_layer) {
+ int svc_refresh_denoiser_buffers, int second_spatial_layer) {
const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
// Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized. For SVC, copy source if the base
// spatial layer was key frame.
if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset ||
- svc_base_is_key) {
+ svc_refresh_denoiser_buffers) {
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < denoiser->num_ref_frames; ++i) {
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index f4da24cbf..d25fe7edc 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -73,11 +73,12 @@ void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
- int svc_base_is_key, int second_spatial_layer);
+ int svc_refresh_denoiser_buffers, int second_spatial_layer);
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
- VP9_DENOISER_DECISION *denoiser_decision);
+ VP9_DENOISER_DECISION *denoiser_decision,
+ int use_gf_temporal_ref);
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 091992dbd..e1207fea7 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -52,33 +52,6 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int output_enabled, int mi_row, int mi_col,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
-// Machine learning-based early termination parameters.
-static const double train_mean[24] = {
- 303501.697372, 3042630.372158, 24.694696, 1.392182,
- 689.413511, 162.027012, 1.478213, 0.0,
- 135382.260230, 912738.513263, 28.845217, 1.515230,
- 544.158492, 131.807995, 1.436863, 0.0,
- 43682.377587, 208131.711766, 28.084737, 1.356677,
- 138.254122, 119.522553, 1.252322, 0.0
-};
-
-static const double train_stdm[24] = {
- 673689.212982, 5996652.516628, 0.024449, 1.989792,
- 985.880847, 0.014638, 2.001898, 0.0,
- 208798.775332, 1812548.443284, 0.018693, 1.838009,
- 396.986910, 0.015657, 1.332541, 0.0,
- 55888.847031, 448587.962714, 0.017900, 1.904776,
- 98.652832, 0.016598, 1.320992, 0.0
-};
-
-// Error tolerance: 0.01%-0.0.05%-0.1%
-static const double classifiers[24] = {
- 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
- 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
- 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
- 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
-};
-
// This is used as a reference when computing the source variance for the
// purpose of activity masking.
// Eventually this should be replaced by custom no-reference routines,
@@ -405,7 +378,8 @@ static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
static void get_variance(var *v) {
v->variance =
(int)(256 * (v->sum_square_error -
- ((v->sum_error * v->sum_error) >> v->log2_count)) >>
+ (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
+ v->log2_count)) >>
v->log2_count);
}
@@ -447,7 +421,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
// No check for vert/horiz split as too few samples for variance.
if (bsize == bsize_min) {
// Variance already computed to set the force_split.
- if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
+ if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->none.variance < threshold) {
@@ -457,9 +431,9 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
return 0;
} else if (bsize > bsize_min) {
// Variance already computed to set the force_split.
- if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
+ if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
// For key frame: take split for bsize above 32X32 or very high variance.
- if (cm->frame_type == KEY_FRAME &&
+ if (frame_is_intra_only(cm) &&
(bsize > BLOCK_32X32 ||
vt.part_variances->none.variance > (threshold << 4))) {
return 0;
@@ -531,7 +505,7 @@ static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
int content_state) {
VP9_COMMON *const cm = &cpi->common;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int is_key_frame = frame_is_intra_only(cm);
const int threshold_multiplier = is_key_frame ? 20 : 1;
int64_t threshold_base =
(int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
@@ -583,6 +557,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
} else {
thresholds[1] = (5 * threshold_base) >> 1;
}
+ if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
}
}
@@ -590,7 +565,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
int content_state) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int is_key_frame = frame_is_intra_only(cm);
if (sf->partition_search_type != VAR_BASED_PARTITION &&
sf->partition_search_type != REFERENCE_PARTITION) {
return;
@@ -617,6 +592,11 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
? (cpi->y_dequant[q][1] << 3)
: 8000;
+ if (cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe)) {
+ cpi->vbp_threshold_sad = 0;
+ cpi->vbp_threshold_copy = 0;
+ }
}
cpi->vbp_threshold_minmax = 15 + (q >> 3);
}
@@ -1203,6 +1183,7 @@ static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
cpi->content_state_sb_fd[sb_offset] = 0;
}
}
+ if (tmp_sad == 0) x->zero_temp_sad_source = 1;
return tmp_sad;
}
@@ -1238,15 +1219,18 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
int pixels_wide = 64, pixels_high = 64;
int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
+ int scene_change_detected =
+ cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe);
// For the variance computation under SVC mode, we treat the frame as key if
// the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
int is_key_frame =
- (cm->frame_type == KEY_FRAME ||
+ (frame_is_intra_only(cm) ||
(is_one_pass_cbr_svc(cpi) &&
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
// Always use 4x4 partition for key frame.
- const int use_4x4_partition = cm->frame_type == KEY_FRAME;
+ const int use_4x4_partition = frame_is_intra_only(cm);
const int low_res = (cm->width <= 352 && cm->height <= 288);
int variance4x4downsample[16];
int segment_id;
@@ -1299,6 +1283,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
// If source_sad is low copy the partition without computing the y_sad.
if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
+ !scene_change_detected &&
copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
x->sb_use_mv_part = 1;
if (cpi->sf.svc_use_lowres_part &&
@@ -1327,7 +1312,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
// Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
// 5-20 for the 16x16 blocks.
- force_split[0] = 0;
+ force_split[0] = scene_change_detected;
if (!is_key_frame) {
// In the case of spatial/temporal scalable coding, the assumption here is
@@ -1343,7 +1328,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
assert(yv12 != NULL);
- if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
+ cpi->svc.use_gf_temporal_ref_current_layer) {
// For now, GOLDEN will not be used for non-zero spatial layers, since
// it may not be a temporal reference.
yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
@@ -1660,11 +1646,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
- if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) {
+ if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) {
update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
}
- if (cm->frame_type != KEY_FRAME && cpi->sf.svc_use_lowres_part &&
+ if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
@@ -1927,13 +1913,22 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (aq_mode == VARIANCE_AQ) {
- const int energy =
- bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
-
if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
cpi->force_update_segmentation ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
- mi->segment_id = vp9_vaq_segment_id(energy);
+ int min_energy;
+ int max_energy;
+
+ // Get sub block energy range
+ if (bsize >= BLOCK_32X32) {
+ vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
+ &max_energy);
+ } else {
+ min_energy = bsize <= BLOCK_16X16 ? x->mb_energy
+ : vp9_block_energy(cpi, x, bsize);
+ }
+
+ mi->segment_id = vp9_vaq_segment_id(min_energy);
} else {
const uint8_t *const map =
cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
@@ -1963,6 +1958,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
if (cyclic_refresh_segment_id_boosted(
get_segment_id(cm, map, bsize, mi_row, mi_col)))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
+ } else {
+ if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
}
// Find best coding mode & reconstruct the MB so it is available
@@ -1991,11 +1988,14 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
}
- x->rdmult = orig_rdmult;
-
// TODO(jingning) The rate-distortion optimization flow needs to be
// refactored to provide proper exit/return handle.
- if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
+ if (rd_cost->rate == INT_MAX)
+ rd_cost->rdcost = INT64_MAX;
+ else
+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+
+ x->rdmult = orig_rdmult;
ctx->rate = rd_cost->rate;
ctx->dist = rd_cost->dist;
@@ -2122,6 +2122,10 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
PICK_MODE_CONTEXT *ctx) {
MACROBLOCK *const x = &td->mb;
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+
+ if (cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ)
+ x->rdmult = x->cb_rdmult;
+
update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
@@ -2453,7 +2457,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
}
x->skip = ctx->skip;
- x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0];
+ x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0];
}
static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
@@ -2629,6 +2633,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
ctx, INT64_MAX);
break;
case PARTITION_HORZ:
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->horizontal[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2638,6 +2643,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
&tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -2650,6 +2656,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
}
break;
case PARTITION_VERT:
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->vertical[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2659,6 +2666,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
&tmp_rdc, subsize,
&pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
@@ -3030,14 +3038,232 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
}
#endif
-// Calculate the score used in machine-learning based partition search early
-// termination.
-static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- const double *clf;
- const double *mean;
- const double *sd;
+#define NN_MAX_HIDDEN_LAYERS 10
+#define NN_MAX_NODES_PER_LAYER 128
+
+// Neural net model config.
+typedef struct {
+ int num_inputs; // Number of input nodes, i.e. features.
+ int num_outputs; // Number of output nodes.
+ int num_hidden_layers; // Number of hidden layers, maximum 10.
+ // Number of nodes for each hidden layer.
+ int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS];
+ // Weight parameters, indexed by layer.
+ const float *weights[NN_MAX_HIDDEN_LAYERS + 1];
+ // Bias parameters, indexed by layer.
+ const float *bias[NN_MAX_HIDDEN_LAYERS + 1];
+} NN_CONFIG;
+
+// Calculate prediction based on the given input features and neural net config.
+// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
+// layer.
+static void nn_predict(const float *features, const NN_CONFIG *nn_config,
+ float *output) {
+ int num_input_nodes = nn_config->num_inputs;
+ int buf_index = 0;
+ float buf[2][NN_MAX_NODES_PER_LAYER];
+ const float *input_nodes = features;
+
+ // Propagate hidden layers.
+ const int num_layers = nn_config->num_hidden_layers;
+ int layer, node, i;
+ assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
+ for (layer = 0; layer < num_layers; ++layer) {
+ const float *weights = nn_config->weights[layer];
+ const float *bias = nn_config->bias[layer];
+ float *output_nodes = buf[buf_index];
+ const int num_output_nodes = nn_config->num_hidden_nodes[layer];
+ assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
+ for (node = 0; node < num_output_nodes; ++node) {
+ float val = 0.0f;
+ for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
+ val += bias[node];
+ // ReLU as activation function.
+ val = VPXMAX(val, 0.0f);
+ output_nodes[node] = val;
+ weights += num_input_nodes;
+ }
+ num_input_nodes = num_output_nodes;
+ input_nodes = output_nodes;
+ buf_index = 1 - buf_index;
+ }
+
+ // Final output layer.
+ {
+ const float *weights = nn_config->weights[num_layers];
+ for (node = 0; node < nn_config->num_outputs; ++node) {
+ const float *bias = nn_config->bias[num_layers];
+ float val = 0.0f;
+ for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
+ output[node] = val + bias[node];
+ weights += num_input_nodes;
+ }
+ }
+}
+
+static const float partition_nn_weights_64x64_layer0[7 * 8] = {
+ -3.571348f, 0.014835f, -3.255393f, -0.098090f, -0.013120f, 0.000221f,
+ 0.056273f, 0.190179f, -0.268130f, -1.828242f, -0.010655f, 0.937244f,
+ -0.435120f, 0.512125f, 1.610679f, 0.190816f, -0.799075f, -0.377348f,
+ -0.144232f, 0.614383f, -0.980388f, 1.754150f, -0.185603f, -0.061854f,
+ -0.807172f, 1.240177f, 1.419531f, -0.438544f, -5.980774f, 0.139045f,
+ -0.032359f, -0.068887f, -1.237918f, 0.115706f, 0.003164f, 2.924212f,
+ 1.246838f, -0.035833f, 0.810011f, -0.805894f, 0.010966f, 0.076463f,
+ -4.226380f, -2.437764f, -0.010619f, -0.020935f, -0.451494f, 0.300079f,
+ -0.168961f, -3.326450f, -2.731094f, 0.002518f, 0.018840f, -1.656815f,
+ 0.068039f, 0.010586f,
+};
+
+static const float partition_nn_bias_64x64_layer0[8] = {
+ -3.469882f, 0.683989f, 0.194010f, 0.313782f,
+ -3.153335f, 2.245849f, -1.946190f, -3.740020f,
+};
+
+static const float partition_nn_weights_64x64_layer1[8] = {
+ -8.058566f, 0.108306f, -0.280620f, -0.818823f,
+ -6.445117f, 0.865364f, -1.127127f, -8.808660f,
+};
+
+static const float partition_nn_bias_64x64_layer1[1] = {
+ 6.46909416f,
+};
+
+static const NN_CONFIG partition_nnconfig_64x64 = {
+ 7, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ partition_nn_weights_64x64_layer0,
+ partition_nn_weights_64x64_layer1,
+ },
+ {
+ partition_nn_bias_64x64_layer0,
+ partition_nn_bias_64x64_layer1,
+ },
+};
+
+static const float partition_nn_weights_32x32_layer0[7 * 8] = {
+ -0.295437f, -4.002648f, -0.205399f, -0.060919f, 0.708037f, 0.027221f,
+ -0.039137f, -0.907724f, -3.151662f, 0.007106f, 0.018726f, -0.534928f,
+ 0.022744f, 0.000159f, -1.717189f, -3.229031f, -0.027311f, 0.269863f,
+ -0.400747f, -0.394366f, -0.108878f, 0.603027f, 0.455369f, -0.197170f,
+ 1.241746f, -1.347820f, -0.575636f, -0.462879f, -2.296426f, 0.196696f,
+ -0.138347f, -0.030754f, -0.200774f, 0.453795f, 0.055625f, -3.163116f,
+ -0.091003f, -0.027028f, -0.042984f, -0.605185f, 0.143240f, -0.036439f,
+ -0.801228f, 0.313409f, -0.159942f, 0.031267f, 0.886454f, -1.531644f,
+ -0.089655f, 0.037683f, -0.163441f, -0.130454f, -0.058344f, 0.060011f,
+ 0.275387f, 1.552226f,
+};
+
+static const float partition_nn_bias_32x32_layer0[8] = {
+ -0.838372f, -2.609089f, -0.055763f, 1.329485f,
+ -1.297638f, -2.636622f, -0.826909f, 1.012644f,
+};
+
+static const float partition_nn_weights_32x32_layer1[8] = {
+ -1.792632f, -7.322353f, -0.683386f, 0.676564f,
+ -1.488118f, -7.527719f, 1.240163f, 0.614309f,
+};
+
+static const float partition_nn_bias_32x32_layer1[1] = {
+ 4.97422546f,
+};
+
+static const NN_CONFIG partition_nnconfig_32x32 = {
+ 7, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ partition_nn_weights_32x32_layer0,
+ partition_nn_weights_32x32_layer1,
+ },
+ {
+ partition_nn_bias_32x32_layer0,
+ partition_nn_bias_32x32_layer1,
+ },
+};
+
+static const float partition_nn_weights_16x16_layer0[7 * 8] = {
+ -1.717673f, -4.718130f, -0.125725f, -0.183427f, -0.511764f, 0.035328f,
+ 0.130891f, -3.096753f, 0.174968f, -0.188769f, -0.640796f, 1.305661f,
+ 1.700638f, -0.073806f, -4.006781f, -1.630999f, -0.064863f, -0.086410f,
+ -0.148617f, 0.172733f, -0.018619f, 2.152595f, 0.778405f, -0.156455f,
+ 0.612995f, -0.467878f, 0.152022f, -0.236183f, 0.339635f, -0.087119f,
+ -3.196610f, -1.080401f, -0.637704f, -0.059974f, 1.706298f, -0.793705f,
+ -6.399260f, 0.010624f, -0.064199f, -0.650621f, 0.338087f, -0.001531f,
+ 1.023655f, -3.700272f, -0.055281f, -0.386884f, 0.375504f, -0.898678f,
+ 0.281156f, -0.314611f, 0.863354f, -0.040582f, -0.145019f, 0.029329f,
+ -2.197880f, -0.108733f,
+};
+
+static const float partition_nn_bias_16x16_layer0[8] = {
+ 0.411516f, -2.143737f, -3.693192f, 2.123142f,
+ -1.356910f, -3.561016f, -0.765045f, -2.417082f,
+};
+
+static const float partition_nn_weights_16x16_layer1[8] = {
+ -0.619755f, -2.202391f, -4.337171f, 0.611319f,
+ 0.377677f, -4.998723f, -1.052235f, 1.949922f,
+};
+
+static const float partition_nn_bias_16x16_layer1[1] = {
+ 3.20981717f,
+};
+
+static const NN_CONFIG partition_nnconfig_16x16 = {
+ 7, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ partition_nn_weights_16x16_layer0,
+ partition_nn_weights_16x16_layer1,
+ },
+ {
+ partition_nn_bias_16x16_layer0,
+ partition_nn_bias_16x16_layer1,
+ },
+};
+
+static const float partition_feature_mean[24] = {
+ 303501.697372f, 3042630.372158f, 24.694696f, 1.392182f,
+ 689.413511f, 162.027012f, 1.478213f, 0.0,
+ 135382.260230f, 912738.513263f, 28.845217f, 1.515230f,
+ 544.158492f, 131.807995f, 1.436863f, 0.0f,
+ 43682.377587f, 208131.711766f, 28.084737f, 1.356677f,
+ 138.254122f, 119.522553f, 1.252322f, 0.0f,
+};
+
+static const float partition_feature_std[24] = {
+ 673689.212982f, 5996652.516628f, 0.024449f, 1.989792f,
+ 985.880847f, 0.014638f, 2.001898f, 0.0f,
+ 208798.775332f, 1812548.443284f, 0.018693f, 1.838009f,
+ 396.986910f, 0.015657f, 1.332541f, 0.0f,
+ 55888.847031f, 448587.962714f, 0.017900f, 1.904776f,
+ 98.652832f, 0.016598f, 1.320992f, 0.0f,
+};
+
+// Error tolerance: 0.01%-0.0.05%-0.1%
+static const float partition_linear_weights[24] = {
+ 0.111736f, 0.289977f, 0.042219f, 0.204765f, 0.120410f, -0.143863f,
+ 0.282376f, 0.847811f, 0.637161f, 0.131570f, 0.018636f, 0.202134f,
+ 0.112797f, 0.028162f, 0.182450f, 1.124367f, 0.386133f, 0.083700f,
+ 0.050028f, 0.150873f, 0.061119f, 0.109318f, 0.127255f, 0.625211f,
+};
+
+// Machine-learning based partition search early termination.
+// Return 1 to skip split and rect partitions.
+static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+ PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
const int mag_mv =
abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
const int left_in_image = !!xd->left_mi;
@@ -3047,11 +3273,32 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int above_par = 0; // above_partitioning
int left_par = 0; // left_partitioning
int last_par = 0; // last_partitioning
- BLOCK_SIZE context_size;
- double score;
int offset = 0;
+ int i;
+ BLOCK_SIZE context_size;
+ const NN_CONFIG *nn_config = NULL;
+ const float *mean, *sd, *linear_weights;
+ float nn_score, linear_score;
+ float features[7];
assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
+ vpx_clear_system_state();
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ offset = 0;
+ nn_config = &partition_nnconfig_64x64;
+ break;
+ case BLOCK_32X32:
+ offset = 8;
+ nn_config = &partition_nnconfig_32x32;
+ break;
+ case BLOCK_16X16:
+ offset = 16;
+ nn_config = &partition_nnconfig_16x16;
+ break;
+ default: assert(0 && "Unexpected block size."); return 0;
+ }
if (above_in_image) {
context_size = xd->above_mi->sb_type;
@@ -3077,25 +3324,348 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
last_par = 1;
}
- if (bsize == BLOCK_64X64)
- offset = 0;
- else if (bsize == BLOCK_32X32)
- offset = 8;
- else if (bsize == BLOCK_16X16)
- offset = 16;
-
- // early termination score calculation
- clf = &classifiers[offset];
- mean = &train_mean[offset];
- sd = &train_stdm[offset];
- score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) +
- clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) +
- clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
- clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) +
- clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) +
- clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
- clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
- return score;
+ mean = &partition_feature_mean[offset];
+ sd = &partition_feature_std[offset];
+ features[0] = ((float)ctx->rate - mean[0]) / sd[0];
+ features[1] = ((float)ctx->dist - mean[1]) / sd[1];
+ features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2];
+ features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3];
+ features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4];
+ features[5] = ((float)cm->base_qindex - mean[5]) * sd[5];
+ features[6] = ((float)last_par - mean[6]) * sd[6];
+
+ // Predict using linear model.
+ linear_weights = &partition_linear_weights[offset];
+ linear_score = linear_weights[7];
+ for (i = 0; i < 7; ++i) linear_score += linear_weights[i] * features[i];
+ if (linear_score > 0.1f) return 0;
+
+ // Predict using neural net model.
+ nn_predict(features, nn_config, &nn_score);
+
+ if (linear_score < -0.0f && nn_score < 0.1f) return 1;
+ if (nn_score < -0.0f && linear_score < 0.1f) return 1;
+ return 0;
+}
+
+#define FEATURES 4
+#define Q_CTX 3
+#define RESOLUTION_CTX 2
+static const float partition_breakout_weights_64[RESOLUTION_CTX][Q_CTX]
+ [FEATURES + 1] = {
+ {
+ {
+ -0.016673f,
+ -0.001025f,
+ -0.000032f,
+ 0.000833f,
+ 1.94261885f - 2.1f,
+ },
+ {
+ -0.160867f,
+ -0.002101f,
+ 0.000011f,
+ 0.002448f,
+ 1.65738142f - 2.5f,
+ },
+ {
+ -0.628934f,
+ -0.011459f,
+ -0.000009f,
+ 0.013833f,
+ 1.47982645f - 1.6f,
+ },
+ },
+ {
+ {
+ -0.064309f,
+ -0.006121f,
+ 0.000232f,
+ 0.005778f,
+ 0.7989465f - 5.0f,
+ },
+ {
+ -0.314957f,
+ -0.009346f,
+ -0.000225f,
+ 0.010072f,
+ 2.80695581f - 5.5f,
+ },
+ {
+ -0.635535f,
+ -0.015135f,
+ 0.000091f,
+ 0.015247f,
+ 2.90381241f - 5.0f,
+ },
+ },
+ };
+
+static const float partition_breakout_weights_32[RESOLUTION_CTX][Q_CTX]
+ [FEATURES + 1] = {
+ {
+ {
+ -0.010554f,
+ -0.003081f,
+ -0.000134f,
+ 0.004491f,
+ 1.68445992f - 3.5f,
+ },
+ {
+ -0.051489f,
+ -0.007609f,
+ 0.000016f,
+ 0.009792f,
+ 1.28089404f - 2.5f,
+ },
+ {
+ -0.163097f,
+ -0.013081f,
+ 0.000022f,
+ 0.019006f,
+ 1.36129403f - 3.2f,
+ },
+ },
+ {
+ {
+ -0.024629f,
+ -0.006492f,
+ -0.000254f,
+ 0.004895f,
+ 1.27919173f - 4.5f,
+ },
+ {
+ -0.083936f,
+ -0.009827f,
+ -0.000200f,
+ 0.010399f,
+ 2.73731065f - 4.5f,
+ },
+ {
+ -0.279052f,
+ -0.013334f,
+ 0.000289f,
+ 0.023203f,
+ 2.43595719f - 3.5f,
+ },
+ },
+ };
+
+static const float partition_breakout_weights_16[RESOLUTION_CTX][Q_CTX]
+ [FEATURES + 1] = {
+ {
+ {
+ -0.013154f,
+ -0.002404f,
+ -0.000977f,
+ 0.008450f,
+ 2.57404566f - 5.5f,
+ },
+ {
+ -0.019146f,
+ -0.004018f,
+ 0.000064f,
+ 0.008187f,
+ 2.15043926f - 2.5f,
+ },
+ {
+ -0.075755f,
+ -0.010858f,
+ 0.000030f,
+ 0.024505f,
+ 2.06848121f - 2.5f,
+ },
+ },
+ {
+ {
+ -0.007636f,
+ -0.002751f,
+ -0.000682f,
+ 0.005968f,
+ 0.19225763f - 4.5f,
+ },
+ {
+ -0.047306f,
+ -0.009113f,
+ -0.000518f,
+ 0.016007f,
+ 2.61068869f - 4.0f,
+ },
+ {
+ -0.069336f,
+ -0.010448f,
+ -0.001120f,
+ 0.023083f,
+ 1.47591054f - 5.5f,
+ },
+ },
+ };
+
+static const float partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX]
+ [FEATURES + 1] = {
+ {
+ {
+ -0.011807f,
+ -0.009873f,
+ -0.000931f,
+ 0.034768f,
+ 1.32254851f - 2.0f,
+ },
+ {
+ -0.003861f,
+ -0.002701f,
+ 0.000100f,
+ 0.013876f,
+ 1.96755111f - 1.5f,
+ },
+ {
+ -0.013522f,
+ -0.008677f,
+ -0.000562f,
+ 0.034468f,
+ 1.53440356f - 1.5f,
+ },
+ },
+ {
+ {
+ -0.003221f,
+ -0.002125f,
+ 0.000993f,
+ 0.012768f,
+ 0.03541421f - 2.0f,
+ },
+ {
+ -0.006069f,
+ -0.007335f,
+ 0.000229f,
+ 0.026104f,
+ 0.17135315f - 1.5f,
+ },
+ {
+ -0.039894f,
+ -0.011419f,
+ 0.000070f,
+ 0.061817f,
+ 0.6739977f - 1.5f,
+ },
+ },
+ };
+
+// ML-based partition search breakout.
+static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
+ const MACROBLOCK *const x,
+ const RD_COST *const rd_cost) {
+ DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
+ const VP9_COMMON *const cm = &cpi->common;
+ float features[FEATURES];
+ const float *linear_weights = NULL; // Linear model weights.
+ float linear_score = 0.0f;
+ const int qindex = cm->base_qindex;
+ const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2);
+ const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720;
+ const int resolution_ctx = is_720p_or_larger ? 1 : 0;
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ linear_weights = partition_breakout_weights_64[resolution_ctx][q_ctx];
+ break;
+ case BLOCK_32X32:
+ linear_weights = partition_breakout_weights_32[resolution_ctx][q_ctx];
+ break;
+ case BLOCK_16X16:
+ linear_weights = partition_breakout_weights_16[resolution_ctx][q_ctx];
+ break;
+ case BLOCK_8X8:
+ linear_weights = partition_breakout_weights_8[resolution_ctx][q_ctx];
+ break;
+ default: assert(0 && "Unexpected block size."); return 0;
+ }
+ if (!linear_weights) return 0;
+
+ { // Generate feature values.
+ const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
+ const int num_pels_log2 = num_pels_log2_lookup[bsize];
+ int feature_index = 0;
+ unsigned int var, sse;
+ float rate_f, dist_f;
+
+ var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, &sse);
+ var = var >> num_pels_log2;
+
+ vpx_clear_system_state();
+
+ rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX);
+ dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2);
+ rate_f =
+ ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
+ rate_f;
+
+ features[feature_index++] = rate_f;
+ features[feature_index++] = dist_f;
+ features[feature_index++] = (float)var;
+ features[feature_index++] = (float)ac_q;
+ assert(feature_index == FEATURES);
+ }
+
+ { // Calculate the output score.
+ int i;
+ linear_score = linear_weights[FEATURES];
+ for (i = 0; i < FEATURES; ++i)
+ linear_score += linear_weights[i] * features[i];
+ }
+
+ return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx];
+}
+#undef FEATURES
+#undef Q_CTX
+#undef RESOLUTION_CTX
+
+int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int orig_rdmult) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost = 0;
+ int64_t mc_dep_cost = 0;
+ int mi_wide = num_8x8_blocks_wide_lookup[bsize];
+ int mi_high = num_8x8_blocks_high_lookup[bsize];
+ int row, col;
+
+ int dr = 0;
+ int count = 0;
+ double r0, rk, beta;
+
+ if (tpl_frame->is_valid == 0) return orig_rdmult;
+
+ if (cpi->common.show_frame) return orig_rdmult;
+
+ for (row = mi_row; row < mi_row + mi_high; ++row) {
+ for (col = mi_col; col < mi_col + mi_wide; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+
+ if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
+
+ intra_cost += this_stats->intra_cost;
+ mc_dep_cost += this_stats->mc_dep_cost;
+
+ ++count;
+ }
+ }
+
+ vpx_clear_system_state();
+
+ r0 = cpi->rd.r0;
+ rk = (double)intra_cost / mc_dep_cost;
+ beta = r0 / rk;
+ dr = vp9_get_adaptive_rdmult(cpi, beta);
+
+ dr = VPXMIN(dr, orig_rdmult * 3 / 2);
+ dr = VPXMAX(dr, orig_rdmult * 1 / 2);
+
+ dr = VPXMAX(1, dr);
+
+ return dr;
}
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
@@ -3145,15 +3715,22 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
+ int must_split = 0;
+ int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ
+ ? x->cb_rdmult
+ : cpi->rd.RDMULT;
+ // Ref frames picked in the [i_th] quarter subblock during square partition
+ // RD search. It may be used to prune ref frame selection of rect partitions.
+ uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
(void)*tp_orig;
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
- // Adjust dist breakout threshold according to the partition size.
dist_breakout_thr >>=
8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+
rate_breakout_thr *= num_pels_log2_lookup[bsize];
vp9_rd_cost_init(&this_rdc);
@@ -3177,10 +3754,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
}
+ // Get sub block energy range
+ if (bsize >= BLOCK_16X16) {
+ int min_energy, max_energy;
+ vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
+ &max_energy);
+ must_split = (min_energy < -3) && (max_energy - min_energy > 2);
+ }
+
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
if (cpi->sf.auto_min_max_partition_size) {
- partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
+ partition_none_allowed &= (bsize <= max_size);
partition_horz_allowed &=
((bsize <= max_size && bsize > min_size) || force_horz_split);
partition_vert_allowed &=
@@ -3267,10 +3852,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref1 = ctx->mic.ref_frame[0];
+ const int ref2 = ctx->mic.ref_frame[1];
+ for (i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
+ }
if (bsize >= BLOCK_8X8) {
+ this_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
+ cpi->partition_cost[pl][PARTITION_NONE], 0);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
- this_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
}
if (this_rdc.rdcost < best_rdc.rdcost) {
@@ -3279,28 +3872,42 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
- if (!cpi->sf.ml_partition_search_early_termination) {
- // If all y, u, v transform blocks in this partition are skippable,
- // and the dist & rate are within the thresholds, the partition search
- // is terminated for current branch of the partition search tree.
- if (!x->e_mbd.lossless && ctx->skippable &&
- ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
- (best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr))) {
- do_split = 0;
- do_rect = 0;
- }
- } else {
+ if (cpi->sf.ml_partition_search_early_termination) {
// Currently, the machine-learning based partition search early
// termination is only used while bsize is 16x16, 32x32 or 64x64,
// VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
if (!x->e_mbd.lossless &&
!segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
- if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) {
+ if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+ }
+
+ if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
+ int use_ml_based_breakout =
+ cpi->sf.use_ml_partition_search_breakout &&
+ cm->base_qindex >= 100;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ use_ml_based_breakout = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (use_ml_based_breakout) {
+ if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
do_split = 0;
do_rect = 0;
}
+ } else {
+ if (!cpi->sf.ml_partition_search_early_termination) {
+ if ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+ (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr)) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
}
}
@@ -3369,7 +3976,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// PARTITION_SPLIT
// TODO(jingning): use the motion vectors given by the above search as
// the starting point of motion search in the following partition type check.
- if (do_split) {
+ if (do_split || must_split) {
subsize = get_subsize(bsize, PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
i = 4;
@@ -3377,10 +3984,21 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
pc_tree->leaf_split[0], best_rdc.rdcost);
-
- if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
+ if (sum_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
+ const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
+ for (i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
+ }
+ }
} else {
- for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
+ for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
+ ++i) {
const int x_idx = (i & 1) * mi_step;
const int y_idx = (i >> 1) * mi_step;
@@ -3390,14 +4008,30 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
pc_tree->split[i]->index = i;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions)
+ pc_tree->split[i]->none.rate = INT_MAX;
rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
mi_col + x_idx, subsize, &this_rdc,
+ // A must split test here increases the number of sub
+ // partitions but hurts metrics results quite a bit,
+ // so this extra test is commented out pending
+ // further tests on whether it adds much in terms of
+ // visual quality.
+ // (must_split) ? best_rdc.rdcost
+ // : best_rdc.rdcost - sum_rdc.rdcost,
best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
break;
} else {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions &&
+ pc_tree->split[i]->none.rate != INT_MAX) {
+ const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
+ const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
@@ -3405,11 +4039,13 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
}
- if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
+ if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) {
+ sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
+ cpi->partition_cost[pl][PARTITION_SPLIT], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
+ if ((sum_rdc.rdcost < best_rdc.rdcost) ||
+ (must_split && (sum_rdc.dist < best_rdc.dist))) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
@@ -3433,6 +4069,22 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
}
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
+ pc_tree->vertical[1].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ uint8_t used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[1];
+ if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2] | ref_frames_used[3];
+ if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[2];
+ if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1] | ref_frames_used[3];
+ if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
+ }
+
// PARTITION_HORZ
if (partition_horz_allowed &&
(do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
@@ -3467,8 +4119,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
+ cpi->partition_cost[pl][PARTITION_HORZ], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_HORZ;
@@ -3515,8 +4168,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
+ cpi->partition_cost[pl][PARTITION_VERT], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_VERT;
@@ -3626,6 +4280,14 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, td->pc_root);
} else {
+ int orig_rdmult = cpi->rd.RDMULT;
+ x->cb_rdmult = orig_rdmult;
+ if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
+ int dr =
+ get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
+ x->cb_rdmult = dr;
+ }
+
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
@@ -3734,6 +4396,18 @@ static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x,
}
}
+static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ TileDataEnc *tile_data, int mi_row,
+ int mi_col) {
+ if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
+ vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
+ } else {
+ vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
+ }
+}
+
static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_COST *rd_cost, BLOCK_SIZE bsize,
@@ -3764,17 +4438,23 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
if (cyclic_refresh_segment_id_boosted(mi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
- if (cm->frame_type == KEY_FRAME)
+ if (frame_is_intra_only(cm))
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)
hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
mi_col);
else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
- else if (bsize >= BLOCK_8X8)
- vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
- else
+ else if (bsize >= BLOCK_8X8) {
+ if (cpi->rc.hybrid_intra_scene_change)
+ hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
+ mi_col);
+ else
+ vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
+ ctx);
+ } else {
vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
+ }
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
@@ -4401,6 +5081,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
x->skip_low_source_sad = 0;
x->lowvar_highsumdiff = 0;
x->content_state_sb = 0;
+ x->zero_temp_sad_source = 0;
x->sb_use_mv_part = 0;
x->sb_mvcol_part = 0;
x->sb_mvrow_part = 0;
@@ -4459,7 +5140,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
// nonrd_pick_partition does not support 4x4 partition, so avoid it
// on key frame for now.
if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
- cpi->oxcf.speed < 6 && cm->frame_type != KEY_FRAME &&
+ cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
// Use lower max_partition_size for low resoultions.
if (cm->width <= 352 && cm->height <= 288)
@@ -4475,7 +5156,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
// TODO(marpan): Seems like nonrd_select_partition does not support
// 4x4 partition. Since 4x4 is used on key frame, use this switch
// for now.
- if (cm->frame_type == KEY_FRAME)
+ if (frame_is_intra_only(cm))
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
else
@@ -4663,6 +5344,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
+#if CONFIG_CONSISTENT_RECODE
+ tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
+#endif
tile_data->mode_map[i][j] = j;
}
}
@@ -4787,7 +5471,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
#endif // CONFIG_VP9_HIGHBITDEPTH
x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
-
+#if CONFIG_CONSISTENT_RECODE
+ x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
+#endif
if (xd->lossless) x->optimize = 0;
cm->tx_mode = select_tx_mode(cpi, xd);
@@ -4830,6 +5516,27 @@ static void encode_frame_internal(VP9_COMP *cpi) {
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
+ } else if (cpi->twopass.gf_group.index && cpi->sf.enable_tpl_model) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost_base = 0;
+ int64_t mc_dep_cost_base = 0;
+ int row, col;
+
+ for (row = 0; row < cm->mi_rows; ++row) {
+ for (col = 0; col < cm->mi_cols; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+ intra_cost_base += this_stats->intra_cost;
+ mc_dep_cost_base += this_stats->mc_dep_cost;
+ }
+ }
+
+ vpx_clear_system_state();
+
+ if (tpl_frame->is_valid)
+ cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
}
{
@@ -4912,9 +5619,48 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
return sum_delta / (cm->mi_rows * cm->mi_cols);
}
+#if CONFIG_CONSISTENT_RECODE
+static void restore_encode_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes[i][j] =
+ rd_opt->prediction_type_threshes_prev[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] =
+ tile_data->thresh_freq_fact_prev[i][j];
+ }
+ }
+ }
+ }
+
+ cm->interp_filter = cpi->sf.default_interp_filter;
+}
+#endif
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+#if CONFIG_CONSISTENT_RECODE
+ restore_encode_params(cpi);
+#endif
+
// In the longer term the encoder should be generalized to match the
// decoder such that we allow compound where one of the 3 buffers has a
// different sign bias and that buffer is then the fixed ref. However, this
@@ -5095,7 +5841,8 @@ static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
for (y = 0; y < ymis; y++)
for (x = 0; x < xmis; x++) {
int map_offset = block_index + y * cm->mi_cols + x;
- if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) &&
+ mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
if (abs(mv.row) < 8 && abs(mv.col) < 8) {
if (cpi->consec_zero_mv[map_offset] < 255)
cpi->consec_zero_mv[map_offset]++;
@@ -5190,7 +5937,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
- if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
+ if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
+ (!cpi->use_svc ||
+ (cpi->use_svc &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
}
}
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 3384de7ea..05128eb1f 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -35,6 +35,7 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_tile_common.h"
+#include "vp9/common/vp9_scan.h"
#include "vp9/encoder/vp9_alt_ref_aq.h"
#include "vp9/encoder/vp9_aq_360.h"
@@ -44,10 +45,11 @@
#include "vp9/encoder/vp9_bitstream.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_encodeframe.h"
+#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_encoder.h"
-#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_ethread.h"
+#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_multi_thread.h"
@@ -84,6 +86,9 @@ static FILE *yuv_skinmap_file = NULL;
#ifdef OUTPUT_YUV_REC
FILE *yuv_rec_file;
#endif
+#ifdef OUTPUT_YUV_SVC_SRC
+FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
+#endif
#if 0
FILE *framepsnr;
@@ -839,6 +844,7 @@ static void vp9_enc_free_mi(VP9_COMMON *cm) {
cm->mi_grid_base = NULL;
vpx_free(cm->prev_mi_grid_base);
cm->prev_mi_grid_base = NULL;
+ cm->mi_alloc_size = 0;
}
static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
@@ -1371,9 +1377,14 @@ static void update_frame_size(VP9_COMP *cpi) {
}
static void init_buffer_indices(VP9_COMP *cpi) {
- cpi->lst_fb_idx = 0;
- cpi->gld_fb_idx = 1;
- cpi->alt_fb_idx = 2;
+ int ref_frame;
+
+ for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
+ cpi->ref_fb_idx[ref_frame] = ref_frame;
+
+ cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
+ cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
+ cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
}
static void init_level_constraint(LevelConstraint *lc) {
@@ -2082,7 +2093,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
BufferPool *const pool) {
- unsigned int i;
+ unsigned int i, frame;
VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
@@ -2250,6 +2261,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
#ifdef OUTPUT_YUV_REC
yuv_rec_file = fopen("rec.yuv", "wb");
#endif
+#ifdef OUTPUT_YUV_SVC_SRC
+ yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
+ yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
+ yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
+#endif
#if 0
framepsnr = fopen("framepsnr.stt", "a");
@@ -2328,6 +2344,23 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_set_speed_features_framesize_independent(cpi);
vp9_set_speed_features_framesize_dependent(cpi);
+ if (cpi->sf.enable_tpl_model) {
+ for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
+ int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+
+ CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
+ vpx_calloc(mi_rows * mi_cols,
+ sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
+ cpi->tpl_stats[frame].is_valid = 0;
+ cpi->tpl_stats[frame].width = mi_cols;
+ cpi->tpl_stats[frame].height = mi_rows;
+ cpi->tpl_stats[frame].stride = mi_cols;
+ cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
+ cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
+ }
+ }
+
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
cpi->source_var_thresh = 0;
@@ -2416,7 +2449,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
void vp9_remove_compressor(VP9_COMP *cpi) {
VP9_COMMON *cm;
- unsigned int i;
+ unsigned int i, frame;
int t;
if (!cpi) return;
@@ -2511,6 +2544,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vp9_denoiser_free(&(cpi->denoiser));
#endif
+ for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
+ vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+ cpi->tpl_stats[frame].is_valid = 0;
+ }
+
for (t = 0; t < cpi->num_workers; ++t) {
VPxWorker *const worker = &cpi->workers[t];
EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
@@ -2568,6 +2606,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
#ifdef OUTPUT_YUV_REC
fclose(yuv_rec_file);
#endif
+#ifdef OUTPUT_YUV_SVC_SRC
+ fclose(yuv_svc_src[0]);
+ fclose(yuv_svc_src[1]);
+ fclose(yuv_svc_src[2]);
+#endif
#if 0
@@ -2917,7 +2960,208 @@ static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
return force_recode;
}
-void vp9_update_reference_frames(VP9_COMP *cpi) {
+// This function is used to shift the virtual indices of last reference frames
+// as follows:
+// LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
+// when the LAST_FRAME is updated.
+static INLINE void shift_last_ref_frames(VP9_COMP *cpi) {
+ int ref_frame;
+ for (ref_frame = LAST_REF_FRAMES - 1; ref_frame > 0; --ref_frame) {
+ cpi->ref_fb_idx[ref_frame] = cpi->ref_fb_idx[ref_frame - 1];
+
+ // [0] is allocated to the current coded frame. The statistics for the
+ // reference frames start at [LAST_FRAME], i.e. [1].
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[ref_frame + LAST_FRAME],
+ cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME],
+ sizeof(cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME]));
+ }
+ }
+}
+
+void update_multi_arf_ref_frames(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ BufferPool *const pool = cm->buffer_pool;
+
+ // NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
+ // for the purpose to verify no mismatch between encoder and decoder.
+ if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx;
+
+ // At this point the new frame has been encoded.
+ // If any buffer copy / swapping is signaled it should be done here.
+
+ if (cm->frame_type == KEY_FRAME) {
+ int ref_frame;
+ for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->ref_fb_idx[ref_frame]],
+ cm->new_fb_idx);
+ }
+ return;
+ }
+
+ if (vp9_preserve_existing_gf(cpi)) {
+ // We have decided to preserve the previously existing golden frame as our
+ // new ARF frame. However, in the short term in function
+ // av1_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
+ // we're updating the GF with the current decoded frame, we save it to the
+ // ARF slot instead.
+ // We now have to update the ARF with the current frame and swap gld_fb_idx
+ // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
+ // slot and, if we're updating the GF, the current frame becomes the new GF.
+ int tmp;
+
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF_FRAME - 1]],
+ cm->new_fb_idx);
+ tmp = cpi->ref_fb_idx[ALTREF_FRAME - 1];
+ cpi->ref_fb_idx[ALTREF_FRAME - 1] = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
+ cpi->ref_fb_idx[GOLDEN_FRAME - 1] = tmp;
+
+ // We need to modify the mapping accordingly
+ cpi->arf_map[0] = cpi->ref_fb_idx[ALTREF_FRAME - 1];
+ } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) {
+ // Deal with the special case for showing existing internal ALTREF_FRAME
+ // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
+ // by updating the virtual indices.
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const int which_arf = gf_group->arf_ref_idx[gf_group->index];
+ int tmp;
+ assert(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE);
+
+ tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
+ shift_last_ref_frames(cpi);
+
+ cpi->ref_fb_idx[LAST_FRAME - 1] = cpi->ref_fb_idx[ALTREF2_FRAME - 1];
+ cpi->ref_fb_idx[ALTREF2_FRAME - 1] = tmp;
+
+ // We need to modify the mapping accordingly
+ cpi->arf_map[which_arf] = cpi->ref_fb_idx[ALTREF2_FRAME - 1];
+
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[ALTREF2_FRAME],
+ sizeof(cpi->interp_filter_selected[ALTREF2_FRAME]));
+ } else { /* For non key/golden frames */
+ // === ALTREF_FRAME ===
+ if (cpi->refresh_alt_ref_frame) {
+ int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
+ int which_arf = 0;
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
+
+ memcpy(cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+
+ // === GOLDEN_FRAME ===
+ if (cpi->refresh_golden_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
+ cm->new_fb_idx);
+
+ memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+
+ // === BWDREF_FRAME ===
+ if (cpi->refresh_bwd_ref_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->ref_fb_idx[BWDREF_FRAME - 1]],
+ cm->new_fb_idx);
+
+ memcpy(cpi->interp_filter_selected[BWDREF_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+
+ // === ALTREF2_FRAME ===
+ if (cpi->refresh_alt2_ref_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]],
+ cm->new_fb_idx);
+
+ memcpy(cpi->interp_filter_selected[ALTREF2_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+ }
+
+ if (cpi->refresh_last_frame) {
+ // NOTE(zoeliu): We have two layers of mapping (1) from the per-frame
+ // reference to the reference frame buffer virtual index; and then (2) from
+ // the virtual index to the reference frame buffer physical index:
+ //
+ // LAST_FRAME, ..., LAST3_FRAME, ..., ALTREF_FRAME
+ // | | |
+ // v v v
+ // ref_fb_idx[0], ..., ref_fb_idx[2], ..., ref_fb_idx[ALTREF_FRAME-1]
+ // | | |
+ // v v v
+ // ref_frame_map[], ..., ref_frame_map[], ..., ref_frame_map[]
+ //
+ // When refresh_last_frame is set, it is intended to retire LAST3_FRAME,
+ // have the other 2 LAST reference frames shifted as follows:
+ // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
+ // , and then have LAST_FRAME refreshed by the newly coded frame.
+ //
+ // To fulfill it, the decoder will be notified to execute following 2 steps:
+ //
+ // (a) To change ref_frame_map[] and have the virtual index of LAST3_FRAME
+ // to point to the newly coded frame, i.e.
+ // ref_frame_map[lst_fb_idexes[2]] => new_fb_idx;
+ //
+ // (b) To change the 1st layer mapping to have LAST_FRAME mapped to the
+ // original virtual index of LAST3_FRAME and have the other mappings
+ // shifted as follows:
+ // LAST_FRAME, LAST2_FRAME, LAST3_FRAME
+ // | | |
+ // v v v
+ // ref_fb_idx[2], ref_fb_idx[0], ref_fb_idx[1]
+ int tmp;
+
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->ref_fb_idx[LAST_REF_FRAMES - 1]],
+ cm->new_fb_idx);
+
+ tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
+
+ shift_last_ref_frames(cpi);
+ cpi->ref_fb_idx[0] = tmp;
+
+ assert(cm->show_existing_frame == 0);
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+
+ if (cpi->rc.is_last_bipred_frame) {
+ // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the
+ // LAST3_FRAME by updating the virtual indices.
+ //
+ // NOTE: The source frame for BWDREF does not have a holding position as
+ // the OVERLAY frame for ALTREF's. Hence, to resolve the reference
+ // virtual index reshuffling for BWDREF, the encoder always
+ // specifies a LAST_BIPRED right before BWDREF and completes the
+ // reshuffling job accordingly.
+ tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
+
+ shift_last_ref_frames(cpi);
+ cpi->ref_fb_idx[0] = cpi->ref_fb_idx[BWDREF_FRAME - 1];
+ cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp;
+
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[BWDREF_FRAME],
+ sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
+ }
+ }
+
+ // Assign virtual indexes for LAST_FRAME, GOLDEN_FRAME, and ALTREF_FRAME
+ cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
+ cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
+ cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
+}
+
+void update_ref_frames(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
BufferPool *const pool = cm->buffer_pool;
@@ -2981,25 +3225,38 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
}
+}
+
+void vp9_update_reference_frames(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ BufferPool *const pool = cm->buffer_pool;
+ SVC *const svc = &cpi->svc;
+
+ if (cpi->extra_arf_allowed)
+ update_multi_arf_ref_frames(cpi);
+ else
+ update_ref_frames(cpi);
+
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->denoiser.denoising_level > kDenLowLow) {
- int svc_base_is_key = 0;
+ int svc_refresh_denoiser_buffers = 0;
int denoise_svc_second_layer = 0;
+ FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
if (cpi->use_svc) {
int realloc_fail = 0;
const int svc_buf_shift =
- cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
+ svc->number_spatial_layers - svc->spatial_layer_id == 2
? cpi->denoiser.num_ref_frames
: 0;
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- svc_base_is_key = lc->is_key_frame;
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ svc_refresh_denoiser_buffers =
+ lc->is_key_frame || svc->spatial_layer_sync[svc->spatial_layer_id];
denoise_svc_second_layer =
- cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1
- : 0;
+ svc->number_spatial_layers - svc->spatial_layer_id == 2 ? 1 : 0;
// Check if we need to allocate extra buffers in the denoiser
// for
// refreshed frames.
@@ -3012,38 +3269,43 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
"Failed to re-allocate denoiser for SVC");
}
vp9_denoiser_update_frame_info(
- &cpi->denoiser, *cpi->Source, cpi->common.frame_type,
- cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
- cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
- cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key,
- denoise_svc_second_layer);
+ &cpi->denoiser, *cpi->Source, frame_type, cpi->refresh_alt_ref_frame,
+ cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,
+ cpi->gld_fb_idx, cpi->lst_fb_idx, cpi->resize_pending,
+ svc_refresh_denoiser_buffers, denoise_svc_second_layer);
}
#endif
+
if (is_one_pass_cbr_svc(cpi)) {
// Keep track of frame index for each reference frame.
- SVC *const svc = &cpi->svc;
if (cm->frame_type == KEY_FRAME) {
int i;
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
// On key frame update all reference frame slots.
for (i = 0; i < REF_FRAMES; i++) {
+ svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id;
// LAST/GOLDEN/ALTREF is already updated above.
if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx &&
i != cpi->alt_fb_idx)
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
}
} else {
- if (cpi->refresh_last_frame)
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- if (cpi->refresh_golden_frame)
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- if (cpi->refresh_alt_ref_frame)
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_last_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_golden_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_alt_ref_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id;
+ }
}
// Copy flags from encoder to SVC struct.
vp9_copy_flags_ref_update_idx(cpi);
+ vp9_svc_update_ref_frame_buffer_idx(cpi);
}
}
@@ -3574,10 +3836,43 @@ static void set_frame_size(VP9_COMP *cpi) {
set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
}
-static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
- uint8_t *dest) {
+#if CONFIG_CONSISTENT_RECODE
+static void save_encode_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
- int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes_prev[i][j] =
+ rd_opt->prediction_type_threshes[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact_prev[i][j] =
+ tile_data->thresh_freq_fact[i][j];
+ }
+ }
+ }
+ }
+}
+#endif
+
+static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
+ uint8_t *dest) {
+ VP9_COMMON *const cm = &cpi->common;
+ int q = 0, bottom_index = 0, top_index = 0;
const INTERP_FILTER filter_scaler =
(is_one_pass_cbr_svc(cpi))
? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id]
@@ -3623,6 +3918,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
filter_scaler, phase_scaler);
}
+#ifdef OUTPUT_YUV_SVC_SRC
+ // Write out at most 3 spatial layers.
+ if (is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id < 3) {
+ vpx_write_yuv_frame(yuv_svc_src[cpi->svc.spatial_layer_id], cpi->Source);
+ }
+#endif
// Unfiltered raw source used in metrics calculation if the source
// has been filtered.
if (is_psnr_calc_enabled(cpi)) {
@@ -3672,7 +3973,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
cpi->Last_Source->y_height != cpi->Source->y_height)
cpi->compute_source_sad_onepass = 0;
- if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) {
+ if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
memset(cpi->consec_zero_mv, 0,
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
}
@@ -3683,15 +3984,29 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
// (need to check encoding time cost for doing this for speed 8).
cpi->rc.high_source_sad = 0;
+ cpi->rc.hybrid_intra_scene_change = 0;
+ cpi->rc.re_encode_maxq_scene_change = 0;
if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
(cpi->oxcf.rc_mode == VPX_VBR ||
cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
- (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc)))
+ (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
vp9_scene_detection_onepass(cpi);
if (cpi->svc.spatial_layer_id == 0)
cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;
+ // For 1 pass CBR, check if we are dropping this frame.
+ // Never drop on key frame, if base layer is key for svc,
+ // on scene change, or if superframe has layer sync.
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
+ !frame_is_intra_only(cm) && !cpi->rc.high_source_sad &&
+ !cpi->svc.high_source_sad_superframe &&
+ !cpi->svc.superframe_has_layer_sync &&
+ (!cpi->use_svc ||
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ if (vp9_rc_drop_frame(cpi)) return 0;
+ }
+
// For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames).
@@ -3715,7 +4030,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
}
- if (cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
+ // TODO(jianj): Look into issue of skin detection with high bitdepth.
+ if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
cpi->oxcf.rc_mode == VPX_CBR &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
@@ -3729,10 +4045,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
suppress_active_map(cpi);
- // For SVC on non-zero spatial layer: check for disabling inter-layer
- // prediction.
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0)
- vp9_svc_constrain_inter_layer_pred(cpi);
+ if (cpi->use_svc) {
+ // On non-zero spatial layer, check for disabling inter-layer
+ // prediction.
+ if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
+ vp9_svc_assert_constraints_pattern(cpi);
+ }
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
@@ -3748,7 +4066,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// it may be pretty bad for rate-control,
// and I should handle it somehow
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
- } else if (cpi->roi.enabled && cm->frame_type != KEY_FRAME) {
+ } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
apply_roi_map(cpi);
}
@@ -3777,8 +4095,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
suppress_active_map(cpi);
// Turn-off cyclic refresh for re-encoded frame.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
+ cr->sb_index = 0;
vp9_disable_segmentation(&cm->seg);
}
apply_active_map(cpi);
@@ -3788,13 +4110,14 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// Update some stats from cyclic refresh, and check for golden frame update.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
- cm->frame_type != KEY_FRAME)
+ !frame_is_intra_only(cm))
vp9_cyclic_refresh_postencode(cpi);
// Update the skip mb flag probabilities based on the distribution
// seen in the last encoder iteration.
// update_base_skip_probs(cpi);
vpx_clear_system_state();
+ return 1;
}
#define MAX_QSTEP_ADJ 4
@@ -4148,12 +4471,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
vp9_encode_frame(cpi);
vpx_clear_system_state();
restore_coding_context(cpi);
- vp9_pack_bitstream(cpi, dest, size);
-
- vp9_encode_frame(cpi);
- vpx_clear_system_state();
-
- restore_coding_context(cpi);
}
}
@@ -4485,11 +4802,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cpi->oxcf.target_bandwidth == 0) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
- vp9_inc_frame_in_layer(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
cpi->last_frame_dropped = 1;
cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
+ if (cpi->svc.framedrop_mode == LAYER_DROP ||
+ cpi->svc.drop_spatial_layer[0] == 0) {
+ // For the case of constrained drop mode where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
return;
}
@@ -4538,55 +4865,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
}
}
- // For 1 pass CBR, check if we are dropping this frame.
- // Never drop on key frame, or if base layer is key for svc.
- if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
- cm->frame_type != KEY_FRAME &&
- (!cpi->use_svc ||
- !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
- int svc_prev_layer_dropped = 0;
- // In the contrained framedrop mode for svc (framedrop_mode =
- // CONSTRAINED_LAYER_DROP), if the previous spatial layer was dropped, drop
- // the current spatial layer.
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1])
- svc_prev_layer_dropped = 1;
- if ((svc_prev_layer_dropped &&
- cpi->svc.framedrop_mode == CONSTRAINED_LAYER_DROP) ||
- vp9_rc_drop_frame(cpi)) {
- vp9_rc_postencode_update_drop_frame(cpi);
- cpi->ext_refresh_frame_flags_pending = 0;
- cpi->last_frame_dropped = 1;
- if (cpi->use_svc) {
- cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
- cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
- vp9_inc_frame_in_layer(cpi);
- cpi->svc.skip_enhancement_layer = 1;
- if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
- int i;
- int all_layers_drop = 1;
- for (i = 0; i < cpi->svc.spatial_layer_id; i++) {
- if (cpi->svc.drop_spatial_layer[i] == 0) {
- all_layers_drop = 0;
- break;
- }
- }
- if (all_layers_drop == 1) cpi->svc.skip_enhancement_layer = 0;
- }
- }
- return;
- }
- }
-
vpx_clear_system_state();
#if CONFIG_INTERNAL_STATS
memset(cpi->mode_chosen_counts, 0,
MAX_MODES * sizeof(*cpi->mode_chosen_counts));
#endif
+#if CONFIG_CONSISTENT_RECODE
+ // Backup to ensure consistency between recodes
+ save_encode_params(cpi);
+#endif
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
- encode_without_recode_loop(cpi, size, dest);
+ if (!encode_without_recode_loop(cpi, size, dest)) return;
} else {
encode_with_recode_loop(cpi, size, dest);
}
@@ -4725,17 +5016,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
}
cm->prev_frame = cm->cur_frame;
- if (cpi->use_svc)
+ if (cpi->use_svc) {
cpi->svc
.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id]
.last_frame_type = cm->frame_type;
+ // Reset layer_sync back to 0 for next frame.
+ cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
+ }
cpi->force_update_segmentation = 0;
if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
+
+ cpi->svc.previous_frame_is_intra_only = cm->intra_only;
+ cpi->svc.set_intra_only_frame = 0;
}
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -4823,6 +5120,12 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
check_initial_width(cpi, subsampling_x, subsampling_y);
#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP9_HIGHBITDEPTH
+ // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for
+ // 8 bits.
+ if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0;
+#endif
+
#if CONFIG_VP9_TEMPORAL_DENOISING
setup_denoiser_buffer(cpi);
#endif
@@ -5186,6 +5489,553 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
}
}
+typedef struct GF_PICTURE {
+ YV12_BUFFER_CONFIG *frame;
+ int ref_frame[3];
+} GF_PICTURE;
+
+void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
+ const GF_GROUP *gf_group, int *tpl_group_frames) {
+ int frame_idx, i;
+ int gld_index = -1;
+ int alt_index = -1;
+ int lst_index = -1;
+ int extend_frame_count = 0;
+ int pframe_qindex = cpi->tpl_stats[2].base_qindex;
+
+ *tpl_group_frames = 0;
+
+ // Initialize Golden reference frame.
+ gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
+ gld_index = 0;
+ ++*tpl_group_frames;
+
+ // Initialize ARF frame
+ gf_picture[1].frame = cpi->Source;
+ gf_picture[1].ref_frame[0] = gld_index;
+ gf_picture[1].ref_frame[1] = lst_index;
+ gf_picture[1].ref_frame[2] = alt_index;
+ alt_index = 1;
+ ++*tpl_group_frames;
+
+ // Initialize P frames
+ for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
+ struct lookahead_entry *buf =
+ vp9_lookahead_peek(cpi->lookahead, frame_idx - 2);
+
+ if (buf == NULL) break;
+
+ gf_picture[frame_idx].frame = &buf->img;
+ gf_picture[frame_idx].ref_frame[0] = gld_index;
+ gf_picture[frame_idx].ref_frame[1] = lst_index;
+ gf_picture[frame_idx].ref_frame[2] = alt_index;
+
+ ++*tpl_group_frames;
+ lst_index = frame_idx;
+ if (gf_group->update_type[frame_idx] == OVERLAY_UPDATE) break;
+ }
+
+ gld_index = frame_idx;
+ lst_index = VPXMAX(0, frame_idx - 1);
+ alt_index = -1;
+ ++frame_idx;
+
+ // Extend two frames outside the current gf group.
+ for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
+ struct lookahead_entry *buf =
+ vp9_lookahead_peek(cpi->lookahead, frame_idx - 2);
+
+ if (buf == NULL) break;
+
+ cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
+
+ gf_picture[frame_idx].frame = &buf->img;
+ gf_picture[frame_idx].ref_frame[0] = gld_index;
+ gf_picture[frame_idx].ref_frame[1] = lst_index;
+ gf_picture[frame_idx].ref_frame[2] = alt_index;
+ lst_index = frame_idx;
+ ++*tpl_group_frames;
+ ++extend_frame_count;
+ }
+}
+
+void init_tpl_stats(VP9_COMP *cpi) {
+ int frame_idx;
+ for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ memset(tpl_frame->tpl_stats_ptr, 0,
+ tpl_frame->height * tpl_frame->width *
+ sizeof(*tpl_frame->tpl_stats_ptr));
+ tpl_frame->is_valid = 0;
+ }
+}
+
+uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
+ uint8_t *cur_frame_buf,
+ uint8_t *ref_frame_buf, int stride,
+ MV *mv, BLOCK_SIZE bsize) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ const SEARCH_METHODS search_method = HEX;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ uint32_t bestsme = UINT_MAX;
+ uint32_t distortion;
+ uint32_t sse;
+ int cost_list[5];
+ const MvLimits tmp_mv_limits = x->mv_limits;
+
+ MV best_ref_mv1 = { 0, 0 };
+ MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+
+ best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+ best_ref_mv1_full.row = best_ref_mv1.row >> 3;
+
+ // Setup frame pointers
+ x->plane[0].src.buf = cur_frame_buf;
+ x->plane[0].src.stride = stride;
+ xd->plane[0].pre[0].buf = ref_frame_buf;
+ xd->plane[0].pre[0].stride = stride;
+
+ step_param = mv_sf->reduce_first_step_size;
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+
+ vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
+
+ vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
+ search_method, sadpb, cond_cost_list(cpi, cost_list),
+ &best_ref_mv1, mv, 0, 0);
+
+ /* restore UMV window */
+ x->mv_limits = tmp_mv_limits;
+
+ // Ignore mv costing by sending NULL pointer instead of cost array
+ bestsme = cpi->find_fractional_mv_step(
+ x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, mv_sf->subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
+ 0);
+
+ return bestsme;
+}
+
+int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
+ int ref_pos_col, int block, BLOCK_SIZE bsize) {
+ int width = 0, height = 0;
+ int bw = 4 << b_width_log2_lookup[bsize];
+ int bh = 4 << b_height_log2_lookup[bsize];
+
+ switch (block) {
+ case 0:
+ width = grid_pos_col + bw - ref_pos_col;
+ height = grid_pos_row + bh - ref_pos_row;
+ break;
+ case 1:
+ width = ref_pos_col + bw - grid_pos_col;
+ height = grid_pos_row + bh - ref_pos_row;
+ break;
+ case 2:
+ width = grid_pos_col + bw - ref_pos_col;
+ height = ref_pos_row + bh - grid_pos_row;
+ break;
+ case 3:
+ width = ref_pos_col + bw - grid_pos_col;
+ height = ref_pos_row + bh - grid_pos_row;
+ break;
+ default: assert(0);
+ }
+
+ return width * height;
+}
+
+int round_floor(int ref_pos, int bsize_pix) {
+ int round;
+ if (ref_pos < 0)
+ round = -(1 + (-ref_pos - 1) / bsize_pix);
+ else
+ round = ref_pos / bsize_pix;
+
+ return round;
+}
+
+void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int stride, int64_t intra_cost,
+ int64_t inter_cost, int ref_frame_idx, int_mv mv) {
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int idx, idy;
+
+ intra_cost = intra_cost / (mi_height * mi_width);
+ inter_cost = inter_cost / (mi_height * mi_width);
+
+ intra_cost = VPXMAX(1, intra_cost);
+ inter_cost = VPXMAX(1, inter_cost);
+
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ TplDepStats *tpl_ptr =
+ &tpl_stats[(mi_row + idy) * stride + (mi_col + idx)];
+ tpl_ptr->intra_cost = intra_cost;
+ tpl_ptr->inter_cost = inter_cost;
+ tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
+ tpl_ptr->ref_frame_index = ref_frame_idx;
+ tpl_ptr->mv.as_int = mv.as_int;
+ }
+ }
+}
+
+void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
+ int mi_row, int mi_col, const BLOCK_SIZE bsize) {
+ TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
+ TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
+ MV mv = tpl_stats->mv.as_mv;
+ int mv_row = mv.row >> 3;
+ int mv_col = mv.col >> 3;
+
+ int ref_pos_row = mi_row * MI_SIZE + mv_row;
+ int ref_pos_col = mi_col * MI_SIZE + mv_col;
+
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int pix_num = bw * bh;
+
+ // top-left on grid block location in pixel
+ int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
+ int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
+ int block;
+
+ for (block = 0; block < 4; ++block) {
+ int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
+ int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
+
+ if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
+ grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
+ int overlap_area = get_overlap_area(
+ grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
+ int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
+ int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
+
+ int64_t mc_flow = tpl_stats->mc_dep_cost -
+ (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
+ tpl_stats->intra_cost;
+
+ int idx, idy;
+
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ TplDepStats *des_stats =
+ &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
+ (ref_mi_col + idx)];
+
+ des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
+ des_stats->mc_ref_cost +=
+ ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
+ pix_num;
+ assert(overlap_area >= 0);
+ }
+ }
+ }
+ }
+}
+
+void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
+ int mi_row, int mi_col, const BLOCK_SIZE bsize) {
+ int idx, idy;
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ TplDepStats *tpl_ptr =
+ &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
+ tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
+ BLOCK_8X8);
+ }
+ }
+}
+
+void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ TX_SIZE tx_size, int64_t *recon_error, int64_t *sse) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+ uint16_t eob;
+ int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
+ const int shift = tx_size == TX_32X32 ? 0 : 2;
+
+ vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
+ qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
+ scan_order->iscan);
+
+ *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
+ *recon_error = VPXMAX(*recon_error, 1);
+
+ *sse = (*sse) >> shift;
+ *sse = VPXMAX(*sse, 1);
+}
+
+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+ TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
+ case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
+ case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
+ default: assert(0);
+ }
+}
+
+void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
+ struct scale_factors *sf, GF_PICTURE *gf_picture,
+ int frame_idx, int16_t *src_diff, tran_low_t *coeff,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
+ YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
+ int64_t *recon_error, int64_t *sse,
+ TplDepStats *tpl_stats) {
+ VP9_COMMON *cm = &cpi->common;
+ ThreadData *td = &cpi->td;
+
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int pix_num = bw * bh;
+ int best_rf_idx = -1;
+ int_mv best_mv;
+ int64_t best_inter_cost = INT64_MAX;
+ int64_t inter_cost;
+ int rf_idx;
+ const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
+
+ int64_t best_intra_cost = INT64_MAX;
+ int64_t intra_cost;
+ PREDICTION_MODE mode;
+ int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ MODE_INFO mi_above, mi_left;
+
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+ xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+ xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
+ xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
+ xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
+
+ // Intra prediction search
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ uint8_t *src, *dst;
+ int src_stride, dst_stride;
+
+ src = xd->cur_buf->y_buffer + mb_y_offset;
+ src_stride = xd->cur_buf->y_stride;
+
+ dst = &predictor[0];
+ dst_stride = bw;
+
+ xd->mi[0]->sb_type = bsize;
+ xd->mi[0]->ref_frame[0] = INTRA_FRAME;
+
+ vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
+ src_stride, dst, dst_stride, 0, 0, 0);
+
+ vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
+
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+
+ intra_cost = vpx_satd(coeff, pix_num);
+
+ if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
+ }
+
+ // Motion compensated prediction
+ best_mv.as_int = 0;
+
+ (void)mb_y_offset;
+ // Motion estimation column boundary
+ x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
+ x->mv_limits.col_max =
+ ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
+
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ int_mv mv;
+ if (ref_frame[rf_idx] == NULL) continue;
+
+ motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &mv.as_mv, bsize);
+
+ // TODO(jingning): Not yet support high bit-depth in the next three
+ // steps.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
+ ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
+ &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
+ mi_row * MI_SIZE, xd->bd);
+ vpx_highbd_subtract_block(
+ bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
+ } else {
+ vp9_build_inter_predictor(
+ ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
+ 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
+ vpx_subtract_block(bh, bw, src_diff, bw,
+ xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &predictor[0], bw);
+ }
+#else
+ vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_stride, &predictor[0], bw,
+ &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE);
+ vpx_subtract_block(bh, bw, src_diff, bw,
+ xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &predictor[0], bw);
+#endif
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+
+ inter_cost = vpx_satd(coeff, pix_num);
+
+ if (inter_cost < best_inter_cost) {
+ best_rf_idx = rf_idx;
+ best_inter_cost = inter_cost;
+ best_mv.as_int = mv.as_int;
+ get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
+ sse);
+ }
+ }
+ best_intra_cost = VPXMAX(best_intra_cost, 1);
+ best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
+ tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
+ tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
+ tpl_stats->mc_dep_cost = tpl_stats->intra_cost + tpl_stats->mc_flow;
+ tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
+ tpl_stats->mv.as_int = best_mv.as_int;
+}
+
+void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
+ YV12_BUFFER_CONFIG *ref_frame[3] = { NULL, NULL, NULL };
+
+ VP9_COMMON *cm = &cpi->common;
+ struct scale_factors sf;
+ int rdmult, idx;
+ ThreadData *td = &cpi->td;
+ MACROBLOCK *x = &td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int mi_row, mi_col;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
+ uint8_t *predictor;
+#else
+ DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
+#endif
+ DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
+
+ const BLOCK_SIZE bsize = BLOCK_32X32;
+ const TX_SIZE tx_size = max_txsize_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int64_t recon_error, sse;
+
+ // Setup scaling factor
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp9_setup_scale_factors_for_frame(
+ &sf, this_frame->y_crop_width, this_frame->y_crop_height,
+ this_frame->y_crop_width, this_frame->y_crop_height,
+ cpi->common.use_highbitdepth);
+
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ predictor = CONVERT_TO_BYTEPTR(predictor16);
+ else
+ predictor = predictor8;
+#else
+ vp9_setup_scale_factors_for_frame(
+ &sf, this_frame->y_crop_width, this_frame->y_crop_height,
+ this_frame->y_crop_width, this_frame->y_crop_height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Prepare reference frame pointers. If any reference frame slot is
+ // unavailable, the pointer will be set to Null.
+ for (idx = 0; idx < 3; ++idx) {
+ int rf_idx = gf_picture[frame_idx].ref_frame[idx];
+ if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
+ }
+
+ xd->mi = cm->mi_grid_visible;
+ xd->mi[0] = cm->mi;
+
+ // Get rd multiplier set up.
+ rdmult =
+ (int)vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
+ if (rdmult < 1) rdmult = 1;
+ set_error_per_bit(&cpi->td.mb, rdmult);
+ vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
+
+ tpl_frame->is_valid = 1;
+
+ cm->base_qindex = tpl_frame->base_qindex;
+ vp9_frame_init_quantizer(cpi);
+
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ // Motion estimation row boundary
+ x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
+ x->mv_limits.row_max =
+ (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ TplDepStats tpl_stats;
+ xd->cur_buf = this_frame;
+ mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, src_diff, coeff,
+ qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size,
+ ref_frame, predictor, &recon_error, &sse, &tpl_stats);
+
+ // Motion flow dependency dispenser.
+ tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
+ tpl_frame->stride, tpl_stats.intra_cost,
+ tpl_stats.inter_cost, tpl_stats.ref_frame_index,
+ tpl_stats.mv);
+
+ tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
+ bsize);
+ }
+ }
+}
+
+void setup_tpl_stats(VP9_COMP *cpi) {
+ GF_PICTURE gf_picture[MAX_LAG_BUFFERS];
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ int tpl_group_frames = 0;
+ int frame_idx;
+
+ // TODO(jingning): Make the model support high bit-depth route.
+#if CONFIG_VP9_HIGHBITDEPTH
+ (void)gf_picture;
+ (void)gf_group;
+ (void)tpl_group_frames;
+ (void)frame_idx;
+ return;
+#endif
+
+ init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
+
+ init_tpl_stats(cpi);
+
+ // Backward propagation from tpl_group_frames to 1.
+ for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx)
+ mc_flow_dispenser(cpi, gf_picture, frame_idx);
+}
+
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
int64_t *time_end, int flush) {
@@ -5289,7 +6139,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
// Read in the source frame.
- if (cpi->use_svc)
+ if (cpi->use_svc || cpi->svc.set_intra_only_frame)
source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
else
source = vp9_lookahead_pop(cpi->lookahead, flush);
@@ -5299,8 +6149,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cm->intra_only = 0;
// if the flags indicate intra frame, but if the current picture is for
// non-zero spatial layer, it should not be an intra picture.
- if ((source->flags & VPX_EFLAG_FORCE_KF) &&
- cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
+ if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
+ cpi->svc.spatial_layer_id > 0) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
}
@@ -5395,6 +6245,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
}
+ if (arf_src_index && cpi->sf.enable_tpl_model) {
+ vp9_estimate_qp_gop(cpi);
+ setup_tpl_stats(cpi);
+ }
+
cpi->td.mb.fp_src_pred = 0;
#if CONFIG_REALTIME_ONLY
if (cpi->use_svc) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 1e0ed70fb..0c6375c65 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -278,10 +278,37 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
}
+typedef struct TplDepStats {
+ int64_t intra_cost;
+ int64_t inter_cost;
+ int64_t mc_flow;
+ int64_t mc_dep_cost;
+ int64_t mc_ref_cost;
+
+ int ref_frame_index;
+ int_mv mv;
+} TplDepStats;
+
+typedef struct TplDepFrame {
+ uint8_t is_valid;
+ TplDepStats *tpl_stats_ptr;
+ int stride;
+ int width;
+ int height;
+ int mi_rows;
+ int mi_cols;
+ int base_qindex;
+} TplDepFrame;
+
+#define TPL_DEP_COST_SCALE_LOG2 4
+
// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+#if CONFIG_CONSISTENT_RECODE
+ int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES];
+#endif
int8_t mode_map[BLOCK_SIZES][MAX_MODES];
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
@@ -473,6 +500,8 @@ typedef struct VP9_COMP {
#endif
YV12_BUFFER_CONFIG *raw_source_frame;
+ TplDepFrame tpl_stats[MAX_LAG_BUFFERS];
+
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
@@ -484,8 +513,13 @@ typedef struct VP9_COMP {
int gld_fb_idx;
int alt_fb_idx;
+ int ref_fb_idx[REF_FRAMES];
+ int last_show_frame_buf_idx; // last show frame buffer index
+
int refresh_last_frame;
int refresh_golden_frame;
+ int refresh_bwd_ref_frame;
+ int refresh_alt2_ref_frame;
int refresh_alt_ref_frame;
int ext_refresh_frame_flags_pending;
@@ -499,7 +533,6 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tile_tok[4][1 << 6];
- uint32_t tok_count[4][1 << 6];
TOKENLIST *tplist[4][1 << 6];
// Ambient reconstruction err target for force key frames
@@ -521,7 +554,7 @@ typedef struct VP9_COMP {
RATE_CONTROL rc;
double framerate;
- int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE];
+ int interp_filter_selected[REF_FRAMES][SWITCHABLE];
struct vpx_codec_pkt_list *output_pkt_list;
@@ -726,6 +759,13 @@ typedef struct VP9_COMP {
uint8_t *count_arf_frame_usage;
uint8_t *count_lastgolden_frame_usage;
+ // Parameters on multi-layer ALTREFs
+ int num_extra_arfs;
+ int arf_map[MAX_EXT_ARFS + 1];
+ int arf_pos_in_gf[MAX_EXT_ARFS + 1];
+ int arf_pos_for_ovrly[MAX_EXT_ARFS + 1];
+ int extra_arf_allowed;
+
vpx_roi_map_t roi;
} VP9_COMP;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 453879fb8..fc1ecd6ce 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -44,7 +44,6 @@
#define COMPLEXITY_STATS_OUTPUT 0
#define FIRST_PASS_Q 10.0
-#define INTRA_MODE_PENALTY 1024
#define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01
#define NEW_MV_MODE_PENALTY 32
@@ -812,6 +811,8 @@ static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile,
fp_acc_data->image_data_start_row);
}
+#define NZ_MOTION_PENALTY 128
+#define INTRA_MODE_PENALTY 1024
void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
FIRSTPASS_DATA *fp_acc_data,
TileDataEnc *tile_data, MV *best_ref_mv,
@@ -1059,7 +1060,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
// Compute the motion error of the 0,0 motion using the last source
// frame as the reference. Skip the further motion search on
- // reconstructed frame if this error is small.
+ // reconstructed frame if this error is very small.
unscaled_last_source_buf_2d.buf =
cpi->unscaled_last_source->y_buffer + recon_yoffset;
unscaled_last_source_buf_2d.stride = cpi->unscaled_last_source->y_stride;
@@ -1076,8 +1077,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
&unscaled_last_source_buf_2d);
#endif // CONFIG_VP9_HIGHBITDEPTH
- // TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > 25) {
+ if (raw_motion_error > NZ_MOTION_PENALTY) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error);
@@ -2115,34 +2115,234 @@ static double calculate_group_score(VP9_COMP *cpi, double av_score,
++s;
++i;
}
- assert(i == frame_count);
return score_total;
}
-static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
- int gf_arf_bits) {
- VP9EncoderConfig *const oxcf = &cpi->oxcf;
+static void define_gf_multi_arf_structure(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
- FIRSTPASS_STATS frame_stats;
int i;
- int frame_index = 1;
- int target_frame_size;
+ int frame_index = 0;
+ const int key_frame = cpi->common.frame_type == KEY_FRAME;
+
+ // The use of bi-predictive frames are only enabled when following 3
+ // conditions are met:
+ // (1) ALTREF is enabled;
+ // (2) The bi-predictive group interval is at least 2; and
+ // (3) The bi-predictive group interval is strictly smaller than the
+ // golden group interval.
+ const int is_bipred_enabled =
+ cpi->extra_arf_allowed && rc->source_alt_ref_pending &&
+ rc->bipred_group_interval &&
+ rc->bipred_group_interval <=
+ (rc->baseline_gf_interval - rc->source_alt_ref_pending);
+ int bipred_group_end = 0;
+ int bipred_frame_index = 0;
+
+ const unsigned char ext_arf_interval =
+ (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1);
+ int which_arf = cpi->num_extra_arfs;
+ int subgroup_interval[MAX_EXT_ARFS + 1];
+ int is_sg_bipred_enabled = is_bipred_enabled;
+ int accumulative_subgroup_interval = 0;
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ // === [frame_index == 0] ===
+ if (!key_frame) {
+ if (rc->source_alt_ref_active) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ }
+ gf_group->arf_update_idx[frame_index] = 0;
+ gf_group->arf_ref_idx[frame_index] = 0;
+ }
+
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+
+ frame_index++;
+
+ bipred_frame_index++;
+
+ // === [frame_index == 1] ===
+ if (rc->source_alt_ref_pending) {
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->arf_src_offset[frame_index] =
+ (unsigned char)(rc->baseline_gf_interval - 1);
+
+ gf_group->arf_update_idx[frame_index] = 0;
+ gf_group->arf_ref_idx[frame_index] = 0;
+
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+ // NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames.
+
+ // Work out the ARFs' positions in this gf group
+ // NOTE: ALT_REFs' are indexed inversely, but coded in display order
+ // (except for the original ARF). In the example of three ALT_REF's,
+ // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0
+ // but code them in the following order:
+ // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0
+ //
+ // arf_pos_for_ovrly[]: Position for OVERLAY
+ // arf_pos_in_gf[]: Position for ALTREF
+ cpi->arf_pos_for_ovrly[0] = frame_index + cpi->num_extra_arfs +
+ gf_group->arf_src_offset[frame_index] + 1;
+ for (i = 0; i < cpi->num_extra_arfs; ++i) {
+ cpi->arf_pos_for_ovrly[i + 1] =
+ frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2);
+ subgroup_interval[i] = cpi->arf_pos_for_ovrly[i] -
+ cpi->arf_pos_for_ovrly[i + 1] - (i == 0 ? 1 : 2);
+ }
+ subgroup_interval[cpi->num_extra_arfs] =
+ cpi->arf_pos_for_ovrly[cpi->num_extra_arfs] - frame_index -
+ (cpi->num_extra_arfs == 0 ? 1 : 2);
+
+ ++frame_index;
+
+ // Insert an extra ARF
+ // === [frame_index == 2] ===
+ if (cpi->num_extra_arfs) {
+ gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->arf_src_offset[frame_index] = ext_arf_interval;
+
+ gf_group->arf_update_idx[frame_index] = which_arf;
+ gf_group->arf_ref_idx[frame_index] = 0;
+ ++frame_index;
+ }
+ accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs];
+ }
+
+ for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
+ gf_group->arf_update_idx[frame_index] = which_arf;
+ gf_group->arf_ref_idx[frame_index] = which_arf;
+
+ // If we are going to have ARFs, check whether we can have BWDREF in this
+ // subgroup, and further, whether we can have ARF subgroup which contains
+ // the BWDREF subgroup but contained within the GF group:
+ //
+ // GF group --> ARF subgroup --> BWDREF subgroup
+ if (rc->source_alt_ref_pending) {
+ is_sg_bipred_enabled =
+ is_bipred_enabled &&
+ (subgroup_interval[which_arf] > rc->bipred_group_interval);
+ }
+
+ // NOTE: 1. BIDIR_PRED is only enabled when the length of the bi-predictive
+ // frame group interval is strictly smaller than that of the GOLDEN
+ // FRAME group interval.
+ // 2. Currently BIDIR_PRED is only enabled when alt-ref is on.
+ if (is_sg_bipred_enabled && !bipred_group_end) {
+ const int cur_brf_src_offset = rc->bipred_group_interval - 1;
+
+ if (bipred_frame_index == 1) {
+ // --- BRF_UPDATE ---
+ gf_group->update_type[frame_index] = BRF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->brf_src_offset[frame_index] = cur_brf_src_offset;
+ } else if (bipred_frame_index == rc->bipred_group_interval) {
+ // --- LAST_BIPRED_UPDATE ---
+ gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->brf_src_offset[frame_index] = 0;
+
+ // Reset the bi-predictive frame index.
+ bipred_frame_index = 0;
+ } else {
+ // --- BIPRED_UPDATE ---
+ gf_group->update_type[frame_index] = BIPRED_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->brf_src_offset[frame_index] = 0;
+ }
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+
+ bipred_frame_index++;
+ // Check whether the next bi-predictive frame group would entirely be
+ // included within the current golden frame group.
+ // In addition, we need to avoid coding a BRF right before an ARF.
+ if (bipred_frame_index == 1 &&
+ (i + 2 + cur_brf_src_offset) >= accumulative_subgroup_interval) {
+ bipred_group_end = 1;
+ }
+ } else {
+ gf_group->update_type[frame_index] = LF_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+ }
+
+ ++frame_index;
+
+ // Check if we need to update the ARF.
+ if (is_sg_bipred_enabled && cpi->num_extra_arfs && which_arf > 0 &&
+ frame_index > cpi->arf_pos_for_ovrly[which_arf]) {
+ --which_arf;
+ accumulative_subgroup_interval += subgroup_interval[which_arf] + 1;
+
+ // Meet the new subgroup; Reset the bipred_group_end flag.
+ bipred_group_end = 0;
+ // Insert another extra ARF after the overlay frame
+ if (which_arf) {
+ gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->arf_src_offset[frame_index] = ext_arf_interval;
+
+ gf_group->arf_update_idx[frame_index] = which_arf;
+ gf_group->arf_ref_idx[frame_index] = 0;
+ ++frame_index;
+ }
+ }
+ }
+
+ // NOTE: We need to configure the frame at the end of the sequence + 1 that
+ // is the start frame for the next group. Otherwise prior to the call to
+ // av1_rc_get_second_pass_params() the data will be undefined.
+ gf_group->arf_update_idx[frame_index] = 0;
+ gf_group->arf_ref_idx[frame_index] = 0;
+
+ if (rc->source_alt_ref_pending) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+
+ cpi->arf_pos_in_gf[0] = 1;
+ if (cpi->num_extra_arfs) {
+ // Overwrite the update_type for extra-ARF's corresponding internal
+ // OVERLAY's: Change from LF_UPDATE to INTNL_OVERLAY_UPDATE.
+ for (i = cpi->num_extra_arfs; i > 0; --i) {
+ cpi->arf_pos_in_gf[i] =
+ (i == cpi->num_extra_arfs ? 2 : cpi->arf_pos_for_ovrly[i + 1] + 1);
+
+ gf_group->update_type[cpi->arf_pos_for_ovrly[i]] = INTNL_OVERLAY_UPDATE;
+ gf_group->rf_level[cpi->arf_pos_for_ovrly[i]] = INTER_NORMAL;
+ }
+ }
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ }
+
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+}
+
+static void define_gf_group_structure(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ int i;
+ int frame_index = 0;
int key_frame;
- const int max_bits = frame_max_bits(&cpi->rc, oxcf);
- int64_t total_group_bits = gf_group_bits;
- int mid_boost_bits = 0;
int mid_frame_idx;
unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
- int alt_frame_index = frame_index;
int normal_frames;
- int normal_frame_bits;
- int last_frame_reduction = 0;
- double av_score = 1.0;
- double tot_norm_frame_score = 1.0;
- double this_frame_score = 1.0;
key_frame = cpi->common.frame_type == KEY_FRAME;
@@ -2150,35 +2350,31 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// For key frames the frame target rate is already set and it
// is also the golden frame.
+ // === [frame_index == 0] ===
if (!key_frame) {
if (rc->source_alt_ref_active) {
- gf_group->update_type[0] = OVERLAY_UPDATE;
- gf_group->rf_level[0] = INTER_NORMAL;
- gf_group->bit_allocation[0] = 0;
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
} else {
- gf_group->update_type[0] = GF_UPDATE;
- gf_group->rf_level[0] = GF_ARF_STD;
- gf_group->bit_allocation[0] = gf_arf_bits;
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
}
- gf_group->arf_update_idx[0] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
}
- // Deduct the boost bits for arf (or gf if it is not a key frame)
- // from the group total.
- if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+ ++frame_index;
- // Store the bits to spend on the ARF if there is one.
+ // === [frame_index == 1] ===
if (rc->source_alt_ref_pending) {
- gf_group->update_type[alt_frame_index] = ARF_UPDATE;
- gf_group->rf_level[alt_frame_index] = GF_ARF_STD;
- gf_group->bit_allocation[alt_frame_index] = gf_arf_bits;
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
- gf_group->arf_src_offset[alt_frame_index] =
+ gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
- gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[alt_frame_index] =
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] =
arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
rc->source_alt_ref_active];
++frame_index;
@@ -2203,6 +2399,103 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
normal_frames =
rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
+ for (i = 0; i < normal_frames; ++i) {
+ int arf_idx = 0;
+ if (twopass->stats_in >= twopass->stats_in_end) break;
+
+ if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
+ if (frame_index <= mid_frame_idx) arf_idx = 1;
+ }
+
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
+
+ gf_group->update_type[frame_index] = LF_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+
+ ++frame_index;
+ }
+
+ // Note:
+ // We need to configure the frame at the end of the sequence + 1 that will be
+ // the start frame for the next group. Otherwise prior to the call to
+ // vp9_rc_get_second_pass_params() the data will be undefined.
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
+
+ if (rc->source_alt_ref_pending) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+
+ // Final setup for second arf and its overlay.
+ if (cpi->multi_arf_enabled)
+ gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ }
+
+ // Note whether multi-arf was enabled this group for next time.
+ cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
+}
+
+static void allocate_gf_multi_arf_bits(VP9_COMP *cpi, int64_t gf_group_bits,
+ int gf_arf_bits) {
+ VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ FIRSTPASS_STATS frame_stats;
+ int i;
+ int frame_index = 0;
+ int target_frame_size;
+ int key_frame;
+ const int max_bits = frame_max_bits(&cpi->rc, oxcf);
+ int64_t total_group_bits = gf_group_bits;
+ int normal_frames;
+ int normal_frame_bits;
+ int last_frame_reduction = 0;
+ double av_score = 1.0;
+ double tot_norm_frame_score = 1.0;
+ double this_frame_score = 1.0;
+
+ // Define the GF structure and specify
+ define_gf_multi_arf_structure(cpi);
+
+ //========================================
+
+ key_frame = cpi->common.frame_type == KEY_FRAME;
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ // === [frame_index == 0] ===
+ if (!key_frame) {
+ gf_group->bit_allocation[frame_index] =
+ rc->source_alt_ref_active ? 0 : gf_arf_bits;
+ }
+
+ // Deduct the boost bits for arf (or gf if it is not a key frame)
+ // from the group total.
+ if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+
+ ++frame_index;
+
+ // === [frame_index == 1] ===
+ // Store the bits to spend on the ARF if there is one.
+ if (rc->source_alt_ref_pending) {
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
+
+ ++frame_index;
+
+ // Skip all the extra-ARF's right after ARF at the starting segment of
+ // the current GF group.
+ if (cpi->num_extra_arfs) {
+ while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
+ ++frame_index;
+ }
+ }
+
+ normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
if (normal_frames > 1)
normal_frame_bits = (int)(total_group_bits / normal_frames);
else
@@ -2215,8 +2508,8 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// Allocate bits to the other frames in the group.
for (i = 0; i < normal_frames; ++i) {
- int arf_idx = 0;
if (EOF == input_stats(twopass, &frame_stats)) break;
+
if (oxcf->vbr_corpus_complexity) {
this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf,
&frame_stats, av_score);
@@ -2230,20 +2523,156 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
target_frame_size -= last_frame_reduction;
}
+ // TODO(zoeliu): Further check whether following is needed for
+ // hierarchical GF group structure.
if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
- mid_boost_bits += (target_frame_size >> 4);
target_frame_size -= (target_frame_size >> 4);
-
- if (frame_index <= mid_frame_idx) arf_idx = 1;
}
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
target_frame_size =
clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits));
- gf_group->update_type[frame_index] = LF_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
+ if (gf_group->update_type[frame_index] == BRF_UPDATE) {
+ // Boost up the allocated bits on BWDREF_FRAME
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size + (target_frame_size >> 2);
+ } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
+ // Press down the allocated bits on LAST_BIPRED_UPDATE frames
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size - (target_frame_size >> 1);
+ } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
+ // TODO(zoeliu): Investigate whether the allocated bits on BIPRED_UPDATE
+ // frames need to be further adjusted.
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+ } else {
+ assert(gf_group->update_type[frame_index] == LF_UPDATE ||
+ gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+ }
+
+ ++frame_index;
+
+ // Skip all the extra-ARF's.
+ if (cpi->num_extra_arfs) {
+ while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
+ ++frame_index;
+ }
+ }
+
+ // NOTE: We need to configure the frame at the end of the sequence + 1 that
+ // will be the start frame for the next group. Otherwise prior to the
+ // call to av1_rc_get_second_pass_params() the data will be undefined.
+ if (rc->source_alt_ref_pending) {
+ if (cpi->num_extra_arfs) {
+ // NOTE: For bit allocation, move the allocated bits associated with
+ // INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
+ // i > 0 for extra-ARF's and i == 0 for ARF:
+ // arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE
+ // arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE
+ for (i = cpi->num_extra_arfs; i > 0; --i) {
+ assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] ==
+ INTNL_OVERLAY_UPDATE);
+
+ // Encoder's choice:
+ // Set show_existing_frame == 1 for all extra-ARF's, and hence
+ // allocate zero bit for both all internal OVERLAY frames.
+ gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] =
+ gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]];
+ gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0;
+ }
+ }
+ }
+}
+
+static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
+ int gf_arf_bits) {
+ VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ FIRSTPASS_STATS frame_stats;
+ int i;
+ int frame_index = 0;
+ int target_frame_size;
+ int key_frame;
+ const int max_bits = frame_max_bits(&cpi->rc, oxcf);
+ int64_t total_group_bits = gf_group_bits;
+ int mid_boost_bits = 0;
+ int mid_frame_idx;
+ int normal_frames;
+ int normal_frame_bits;
+ int last_frame_reduction = 0;
+ double av_score = 1.0;
+ double tot_norm_frame_score = 1.0;
+ double this_frame_score = 1.0;
+
+ // Define the GF structure and specify
+ define_gf_group_structure(cpi);
+
+ key_frame = cpi->common.frame_type == KEY_FRAME;
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ // === [frame_index == 0] ===
+ if (!key_frame) {
+ gf_group->bit_allocation[frame_index] =
+ rc->source_alt_ref_active ? 0 : gf_arf_bits;
+ }
+
+ // Deduct the boost bits for arf (or gf if it is not a key frame)
+ // from the group total.
+ if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+
+ ++frame_index;
+
+ // === [frame_index == 1] ===
+ // Store the bits to spend on the ARF if there is one.
+ if (rc->source_alt_ref_pending) {
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
+
+ ++frame_index;
+
+ // Set aside a slot for a level 1 arf.
+ if (cpi->multi_arf_enabled) ++frame_index;
+ }
+
+ // Define middle frame
+ mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
+
+ normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
+ if (normal_frames > 1)
+ normal_frame_bits = (int)(total_group_bits / normal_frames);
+ else
+ normal_frame_bits = (int)total_group_bits;
+
+ if (oxcf->vbr_corpus_complexity) {
+ av_score = get_distribution_av_err(cpi, twopass);
+ tot_norm_frame_score = calculate_group_score(cpi, av_score, normal_frames);
+ }
+
+ // Allocate bits to the other frames in the group.
+ for (i = 0; i < normal_frames; ++i) {
+ if (EOF == input_stats(twopass, &frame_stats)) break;
+ if (oxcf->vbr_corpus_complexity) {
+ this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf,
+ &frame_stats, av_score);
+ normal_frame_bits = (int)((double)total_group_bits *
+ (this_frame_score / tot_norm_frame_score));
+ }
+
+ target_frame_size = normal_frame_bits;
+ if ((i == (normal_frames - 1)) && (i >= 1)) {
+ last_frame_reduction = normal_frame_bits / 16;
+ target_frame_size -= last_frame_reduction;
+ }
+
+ if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
+ mid_boost_bits += (target_frame_size >> 4);
+ target_frame_size -= (target_frame_size >> 4);
+ }
+
+ target_frame_size =
+ clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits));
gf_group->bit_allocation[frame_index] = target_frame_size;
++frame_index;
@@ -2256,27 +2685,15 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// We need to configure the frame at the end of the sequence + 1 that will be
// the start frame for the next group. Otherwise prior to the call to
// vp9_rc_get_second_pass_params() the data will be undefined.
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
-
// Final setup for second arf and its overlay.
if (cpi->multi_arf_enabled) {
gf_group->bit_allocation[2] =
gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
- gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
gf_group->bit_allocation[mid_frame_idx] = 0;
}
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
}
-
- // Note whether multi-arf was enabled this group for next time.
- cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
}
// Adjusts the ARNF filter for a GF group.
@@ -2297,6 +2714,7 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
#define ARF_DECAY_BREAKOUT 0.10
#define ARF_ABS_ZOOM_THRESH 4.0
+#define MAX_GF_BOOST 5400
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2338,6 +2756,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const int is_key_frame = frame_is_intra_only(cm);
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+ int disable_bwd_extarf;
+
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (is_key_frame == 0) {
@@ -2442,7 +2862,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Monitor for static sections.
if ((rc->frames_since_key + i - 1) > 1) {
- zero_motion_accumulator *= get_zero_motion_factor(cpi, &next_frame);
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
}
// Break clause to detect very still sections after motion. For example,
@@ -2496,6 +2917,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Should we use the alternate reference frame.
if ((zero_motion_accumulator < 0.995) && allow_alt_ref &&
+ (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
(i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1
@@ -2512,7 +2934,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
? 1
: 0;
} else {
- rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1));
+ rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, 0, (i - 1)));
rc->source_alt_ref_pending = 0;
}
@@ -2531,6 +2953,39 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ // TODO(zoeliu): Turn on the option to disable extra ALTREFs for still GF
+ // groups.
+ // Disable extra altrefs for "still" gf group:
+ // zero_motion_accumulator: minimum percentage of (0,0) motion;
+ // avg_sr_coded_error: average of the SSE per pixel of each frame;
+ // avg_raw_err_stdev: average of the standard deviation of (0,0)
+ // motion error per block of each frame.
+#if 0
+ assert(num_mbs > 0);
+ disable_bwd_extarf =
+ (zero_motion_accumulator > MIN_ZERO_MOTION &&
+ avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR &&
+ avg_raw_err_stdev < MAX_RAW_ERR_VAR);
+#else
+ disable_bwd_extarf = 0;
+#endif // 0
+
+ if (disable_bwd_extarf) cpi->extra_arf_allowed = 0;
+
+ if (!cpi->extra_arf_allowed) {
+ cpi->num_extra_arfs = 0;
+ } else {
+ // Compute how many extra alt_refs we can have
+ cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
+ rc->source_alt_ref_pending);
+ }
+ // Currently at maximum two extra ARFs' are allowed
+ assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
+
+ rc->bipred_group_interval = BFG_INTERVAL;
+ // The minimum bi-predictive frame group interval is 2.
+ if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0;
+
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -2582,7 +3037,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_group_error_left -= gf_group_err;
// Allocate bits to each of the frames in the GF group.
- allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits);
+ if (cpi->extra_arf_allowed) {
+ allocate_gf_multi_arf_bits(cpi, gf_group_bits, gf_arf_bits);
+ } else {
+ allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits);
+ }
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -2974,7 +3433,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Special case for static / slide show content but dont apply
// if the kf group is very short.
if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
- rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST);
+ rc->kf_boost = MAX_KF_TOT_BOOST;
} else {
// Apply various clamps for min and max boost
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
@@ -3008,39 +3467,109 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Define the reference buffers that will be updated post encode.
-static void configure_buffer_updates(VP9_COMP *cpi) {
+static void configure_multi_arf_buffer_updates(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
cpi->rc.is_src_frame_alt_ref = 0;
+ cpi->rc.is_bwd_ref_frame = 0;
+ cpi->rc.is_last_bipred_frame = 0;
+ cpi->rc.is_bipred_frame = 0;
+ cpi->rc.is_src_frame_ext_arf = 0;
+
switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
case KF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
+ cpi->refresh_bwd_ref_frame = 1;
+ cpi->refresh_alt2_ref_frame = 1;
cpi->refresh_alt_ref_frame = 1;
break;
+
case LF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
+
case GF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
break;
+
case OVERLAY_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 1;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
+
cpi->rc.is_src_frame_alt_ref = 1;
break;
- default:
- assert(twopass->gf_group.update_type[twopass->gf_group.index] ==
- ARF_UPDATE);
+
+ case ARF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
+ // NOTE: BWDREF does not get updated along with ALTREF_FRAME.
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
cpi->refresh_alt_ref_frame = 1;
break;
+
+ case BRF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 1;
+ cpi->refresh_alt2_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ cpi->rc.is_bwd_ref_frame = 1;
+ break;
+
+ case LAST_BIPRED_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ cpi->rc.is_last_bipred_frame = 1;
+ break;
+
+ case BIPRED_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ cpi->rc.is_bipred_frame = 1;
+ break;
+
+ case INTNL_OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ cpi->rc.is_src_frame_alt_ref = 1;
+ cpi->rc.is_src_frame_ext_arf = 1;
+ break;
+
+ case INTNL_ARF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt2_ref_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+
+ default: assert(0); break;
}
}
@@ -3070,15 +3599,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
GF_GROUP *const gf_group = &twopass->gf_group;
FIRSTPASS_STATS this_frame;
- int target_rate;
-
if (!twopass->stats_in) return;
// If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need.
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
int target_rate;
- configure_buffer_updates(cpi);
+
+ if (cpi->extra_arf_allowed) {
+ configure_multi_arf_buffer_updates(cpi);
+ } else {
+ vp9_configure_buffer_updates(cpi, gf_group->index);
+ }
+
target_rate = gf_group->bit_allocation[gf_group->index];
target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
rc->base_frame_target = target_rate;
@@ -3170,7 +3703,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
#endif
}
- configure_buffer_updates(cpi);
+ if (cpi->extra_arf_allowed) {
+ configure_multi_arf_buffer_updates(cpi);
+ } else {
+ vp9_configure_buffer_updates(cpi, gf_group->index);
+ }
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
@@ -3179,8 +3716,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
- target_rate = gf_group->bit_allocation[gf_group->index];
- rc->base_frame_target = target_rate;
+ rc->base_frame_target = gf_group->bit_allocation[gf_group->index];
// The multiplication by 256 reverses a scaling factor of (>> 8)
// applied when combining MB error values for the frame.
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index aa497e3da..271791324 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -11,6 +11,8 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
+#include <assert.h>
+
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_ratectrl.h"
@@ -41,6 +43,13 @@ typedef struct {
#define INVALID_ROW -1
+// Length of the bi-predictive frame group (BFG)
+// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
+// number of bi-predictive frames.
+#define BFG_INTERVAL 2
+#define MAX_EXT_ARFS 2
+#define MIN_EXT_ARF_INTERVAL 4
+
typedef struct {
double frame_mb_intra_factor;
double frame_mb_brightness_factor;
@@ -107,7 +116,12 @@ typedef enum {
GF_UPDATE = 2,
ARF_UPDATE = 3,
OVERLAY_UPDATE = 4,
- FRAME_UPDATE_TYPES = 5
+ BRF_UPDATE = 5, // Backward Reference Frame
+ LAST_BIPRED_UPDATE = 6, // Last Bi-predictive Frame
+ BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one
+ INTNL_OVERLAY_UPDATE = 8, // Internal Overlay Frame
+ INTNL_ARF_UPDATE = 9, // Internal Altref Frame (candidate for ALTREF2)
+ FRAME_UPDATE_TYPES = 10
} FRAME_UPDATE_TYPE;
#define FC_ANIMATION_THRESH 0.15
@@ -120,12 +134,14 @@ typedef enum {
typedef struct {
unsigned char index;
unsigned char first_inter_index;
- RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
- FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1];
- unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
- unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
- unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
- int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char brf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char bidir_pred_enabled[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2];
} GF_GROUP;
typedef struct {
@@ -194,7 +210,6 @@ void vp9_first_pass_encode_tile_mb_row(struct VP9_COMP *cpi,
void vp9_init_second_pass(struct VP9_COMP *cpi);
void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
-void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
@@ -202,6 +217,17 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
+static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
+ assert(MAX_EXT_ARFS > 0);
+ if (arf_pending) {
+ if (interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1))
+ return MAX_EXT_ARFS;
+ else if (interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS)
+ return MAX_EXT_ARFS - 1;
+ }
+ return 0;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 1ba518af8..50a2c9057 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (rv && search_subpel) {
int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+ if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
+ int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+ if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
+ abs(tmp_mv->as_mv.col) >= mv_thresh)
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
+ else
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below;
+ }
cpi->find_fractional_mv_step(
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
@@ -342,7 +350,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
struct macroblockd_plane *const pd = &xd->plane[0];
const uint32_t dc_quant = pd->dequant[0];
const uint32_t ac_quant = pd->dequant[1];
- const int64_t dc_thr = dc_quant * dc_quant >> 6;
+ int64_t dc_thr = dc_quant * dc_quant >> 6;
int64_t ac_thr = ac_quant * ac_quant >> 6;
unsigned int var;
int sum;
@@ -398,6 +406,11 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
tx_size = TX_8X8;
else if (tx_size > TX_16X16)
tx_size = TX_16X16;
+
+ // For screen-content force 4X4 tx_size over 8X8, for large variance.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && tx_size == TX_8X8 &&
+ bsize <= BLOCK_16X16 && var > (ac_thr << 6))
+ tx_size = TX_4X4;
} else {
tx_size = VPXMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -406,6 +419,10 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
assert(tx_size >= TX_8X8);
xd->mi[0]->tx_size = tx_size;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->zero_temp_sad_source &&
+ x->source_variance == 0)
+ dc_thr = dc_thr << 1;
+
// Evaluate if the partition block is a skippable block in Y plane.
{
unsigned int sse16x16[16] = { 0 };
@@ -576,6 +593,12 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
xd->mi[0]->tx_size = TX_8X8;
else if (xd->mi[0]->tx_size > TX_16X16)
xd->mi[0]->tx_size = TX_16X16;
+
+ // For screen-content force 4X4 tx_size over 8X8, for large variance.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+ xd->mi[0]->tx_size == TX_8X8 && bsize <= BLOCK_16X16 &&
+ var > (ac_thr << 6))
+ xd->mi[0]->tx_size = TX_4X4;
} else {
xd->mi[0]->tx_size =
VPXMIN(max_txsize_lookup[bsize],
@@ -1332,6 +1355,7 @@ static void recheck_zeromv_after_denoising(
mi->ref_frame[1] = NONE;
mi->mv[0].as_int = 0;
mi->interp_filter = EIGHTTAP;
+ if (cpi->sf.default_interp_filter == BILINEAR) mi->interp_filter = BILINEAR;
xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
@@ -1421,7 +1445,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- const SVC *const svc = &cpi->svc;
+ SVC *const svc = &cpi->svc;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -1488,6 +1512,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int skip_ref_find_pred[4] = { 0 };
unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX;
+ int gf_temporal_ref = 0;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
@@ -1495,7 +1520,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif
INTERP_FILTER filter_gf_svc = EIGHTTAP;
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
- MV_REFERENCE_FRAME spatial_ref = GOLDEN_FRAME;
+ MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME;
const struct segmentation *const seg = &cm->seg;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
@@ -1504,27 +1529,45 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int svc_mv_row = 0;
int no_scaling = 0;
unsigned int thresh_svc_skip_golden = 500;
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) {
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id - 1,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ int scene_change_detected =
+ cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe);
+ x->source_variance = UINT_MAX;
+ if (cpi->sf.default_interp_filter == BILINEAR) {
+ best_pred_filter = BILINEAR;
+ filter_gf_svc = BILINEAR;
+ }
+ if (cpi->use_svc && svc->spatial_layer_id > 0) {
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1;
}
- if (cpi->svc.spatial_layer_id > 0 &&
- (cpi->svc.high_source_sad_superframe || no_scaling))
+ if (svc->spatial_layer_id > 0 &&
+ (svc->high_source_sad_superframe || no_scaling))
thresh_svc_skip_golden = 0;
// Lower the skip threshold if lower spatial layer is better quality relative
// to current layer.
- else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
- cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 &&
+ cm->base_qindex > svc->lower_layer_qindex + 15)
thresh_svc_skip_golden = 100;
// Increase skip threshold if lower spatial layer is lower quality relative
// to current layer.
- else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
- cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 &&
+ cm->base_qindex < svc->lower_layer_qindex - 20)
thresh_svc_skip_golden = 1000;
+ if (!cpi->use_svc ||
+ (svc->use_gf_temporal_ref_current_layer &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
+ gf_temporal_ref = 1;
+ if (cpi->rc.avg_frame_low_motion > 70)
+ thresh_svc_skip_golden = 500;
+ else
+ thresh_svc_skip_golden = 0;
+ }
+
init_ref_frame_cost(cm, xd, ref_frame_cost);
memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES);
@@ -1554,12 +1597,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// filter_ref, we use a less strict condition on assigning filter_ref.
// This is to reduce the probabily of entering the flow of not assigning
// filter_ref and then skip filter search.
- if (xd->above_mi && is_inter_block(xd->above_mi))
- filter_ref = xd->above_mi->interp_filter;
- else if (xd->left_mi && is_inter_block(xd->left_mi))
- filter_ref = xd->left_mi->interp_filter;
- else
- filter_ref = cm->interp_filter;
+ filter_ref = cm->interp_filter;
+ if (cpi->sf.default_interp_filter != BILINEAR) {
+ if (xd->above_mi && is_inter_block(xd->above_mi))
+ filter_ref = xd->above_mi->interp_filter;
+ else if (xd->left_mi && is_inter_block(xd->left_mi))
+ filter_ref = xd->left_mi->interp_filter;
+ }
// initialize mode decisions
vp9_rd_cost_reset(&best_rdc);
@@ -1580,15 +1624,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif // CONFIG_VP9_HIGHBITDEPTH
x->source_variance =
vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && mi->segment_id > 0 &&
+ x->zero_temp_sad_source && x->source_variance == 0) {
+ mi->segment_id = 0;
+ vp9_init_plane_quantizers(cpi, x);
+ }
}
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
if (cpi->use_svc) {
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame;
}
if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
@@ -1596,7 +1646,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
#endif
- if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc &&
+ if (cpi->rc.frames_since_golden == 0 && gf_temporal_ref &&
!cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) {
usable_ref_frame = LAST_FRAME;
} else {
@@ -1623,19 +1673,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For svc mode, on spatial_layer_id > 0: if the reference has different scale
// constrain the inter mode to only test zero motion.
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
- cpi->svc.spatial_layer_id > 0) {
+ svc->spatial_layer_id > 0 && !gf_temporal_ref) {
if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
svc_force_zero_mode[LAST_FRAME - 1] = 1;
- spatial_ref = LAST_FRAME;
+ inter_layer_ref = LAST_FRAME;
}
}
if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
- spatial_ref = GOLDEN_FRAME;
+ inter_layer_ref = GOLDEN_FRAME;
}
}
}
@@ -1652,6 +1702,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
}
+ if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad ||
+ cpi->rc.avg_frame_low_motion < 60))
+ usable_ref_frame = LAST_FRAME;
+
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
!svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
@@ -1677,6 +1731,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
+ // Skip find_predictor if the reference frame is not in the
+ // ref_frame_flags (i.e., not used as a reference for this frame).
+ skip_ref_find_pred[ref_frame] =
+ !(cpi->ref_frame_flags & flag_list[ref_frame]);
if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
&ref_frame_skip_mask, flag_list, tile_data, mi_row,
@@ -1692,9 +1750,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// an averaging filter for downsampling (phase = 8). If so, we will test
// a nonzero motion mode on the spatial reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- svc_force_zero_mode[spatial_ref - 1] &&
- cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc_force_zero_mode[inter_layer_ref - 1] &&
+ svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
+ !gf_temporal_ref) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
@@ -1713,7 +1772,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int inter_mv_mode = 0;
int skip_this_mv = 0;
int comp_pred = 0;
- int force_gf_mv = 0;
+ int force_mv_inter_layer = 0;
PREDICTION_MODE this_mode;
second_ref_frame = NONE;
@@ -1737,14 +1796,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
+ if (svc->previous_frame_is_intra_only) {
+ if (ref_frame != LAST_FRAME || frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
+ }
+
// If the segment reference frame feature is enabled then do nothing if the
// current ref frame is not allowed.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
- if (flag_svc_subpel && ref_frame == spatial_ref) {
- force_gf_mv = 1;
+ if (flag_svc_subpel && ref_frame == inter_layer_ref) {
+ force_mv_inter_layer = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
if (this_mode == NEWMV) {
@@ -1771,8 +1835,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
sse_zeromv_normalized < thresh_svc_skip_golden)
continue;
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
+
if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
- this_mode != NEARESTMV) {
+ (frame_mv[this_mode][ref_frame].as_int != 0 ||
+ (cpi->oxcf.content == VP9E_CONTENT_SCREEN && !svc->spatial_layer_id &&
+ !x->zero_temp_sad_source))) {
continue;
}
@@ -1802,14 +1870,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
}
- if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
-
if (const_motion[ref_frame] && this_mode == NEARMV) continue;
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later.
- if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
+ if (!force_mv_inter_layer && force_skip_low_temp_var &&
+ ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1823,7 +1890,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (cpi->use_svc) {
- if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] &&
+ if (!force_mv_inter_layer && svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
continue;
}
@@ -1883,10 +1950,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
(!cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])))
- continue;
+ if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
- if (this_mode == NEWMV && !force_gf_mv) {
- if (ref_frame > LAST_FRAME && !cpi->use_svc &&
+ if (this_mode == NEWMV && !force_mv_inter_layer) {
+ if (ref_frame > LAST_FRAME && gf_temporal_ref &&
cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
uint32_t dis;
@@ -1931,7 +1998,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16,
// for SVC encoding.
- if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 &&
+ if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 &&
frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
frame_mv[NEWMV][ref_frame].as_mv.col == 0)
continue;
@@ -2028,7 +2095,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
(ref_frame == LAST_FRAME ||
- (ref_frame == GOLDEN_FRAME && !force_gf_mv &&
+ (ref_frame == GOLDEN_FRAME && !force_mv_inter_layer &&
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3];
@@ -2188,7 +2255,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Skipping checking: test to see if this block can be reconstructed by
// prediction only.
- if (cpi->allow_encode_breakout) {
+ if (cpi->allow_encode_breakout && !xd->lossless && !scene_change_detected) {
encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode,
var_y, sse_y, yv12_mb, &this_rdc.rate,
&this_rdc.dist, flag_preduv_computed);
@@ -2235,7 +2302,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// If early termination flag is 1 and at least 2 modes are checked,
// the mode search is terminated.
- if (best_early_term && idx > 0) {
+ if (best_early_term && idx > 0 && !scene_change_detected) {
x->skip = 1;
break;
}
@@ -2254,17 +2321,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// layer is chosen as the reference. Always perform intra prediction if
// LAST is the only reference, or is_key_frame is set, or on base
// temporal layer.
- if (cpi->svc.spatial_layer_id) {
+ if (svc->spatial_layer_id && !gf_temporal_ref) {
perform_intra_pred =
- cpi->svc.temporal_layer_id == 0 ||
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
+ svc->temporal_layer_id == 0 ||
+ svc->layer_context[svc->temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
- (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ (!svc->layer_context[svc->temporal_layer_id].is_key_frame &&
svc_force_zero_mode[best_ref_frame - 1]);
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
- if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
- cpi->rc.is_src_frame_alt_ref)
+ if ((cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref) ||
+ svc->previous_frame_is_intra_only)
perform_intra_pred = 0;
// If the segment reference frame feature is enabled and set then
@@ -2276,6 +2344,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (best_rdc.rdcost == INT64_MAX ||
+ (scene_change_detected && perform_intra_pred) ||
((!force_skip_low_temp_var || bsize < BLOCK_32X32 ||
x->content_state_sb == kVeryHighSad) &&
perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh &&
@@ -2429,7 +2498,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
frame_mv, reuse_inter_pred, best_tx_size,
best_mode, best_ref_frame, best_pred_filter,
best_mode_skip_txfm);
- vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
+ vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision,
+ gf_temporal_ref);
recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb,
&best_rdc, bsize, mi_row, mi_col);
best_ref_frame = ctx_den.best_ref_frame;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 599337f80..ec969e0cc 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -273,6 +273,14 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
const VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ // On dropped frame, don't update buffer if its currently stable
+ // (above optimal level). This can cause issues when full superframe
+ // can drop (!= LAYER_DROP), since QP is adjusted downwards with buffer
+ // overflow, which can cause more frame drops.
+ if (cpi->svc.framedrop_mode != LAYER_DROP && encoded_frame_size == 0 &&
+ rc->buffer_level > rc->optimal_buffer_level)
+ return;
+
// Non-viewable frames are a special case and are treated as pure overhead.
if (!cm->show_frame) {
rc->bits_off_target -= encoded_frame_size;
@@ -355,6 +363,8 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->high_source_sad = 0;
rc->reset_high_source_sad = 0;
rc->high_source_sad_lagindex = -1;
+ rc->hybrid_intra_scene_change = 0;
+ rc->re_encode_maxq_scene_change = 0;
rc->alt_ref_gf_group = 0;
rc->last_frame_is_src_altref = 0;
rc->fac_active_worst_inter = 150;
@@ -390,7 +400,34 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
}
-static int check_buffer(VP9_COMP *cpi, int drop_mark) {
+static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) {
+ SVC *svc = &cpi->svc;
+ if (!cpi->use_svc || cpi->svc.framedrop_mode != FULL_SUPERFRAME_DROP) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ return (rc->buffer_level > drop_mark);
+ } else {
+ int i;
+ // For SVC in the FULL_SUPERFRAME_DROP): the condition on
+ // buffer (if its above threshold, so no drop) is checked on current and
+ // upper spatial layers. If any spatial layer is not above threshold then
+ // we return 0.
+ for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ // Exclude check for layer whose bitrate is 0.
+ if (lc->target_bandwidth > 0) {
+ const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] *
+ lrc->optimal_buffer_level / 100);
+ if (!(lrc->buffer_level > drop_mark_layer)) return 0;
+ }
+ }
+ return 1;
+ }
+}
+
+static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) {
SVC *svc = &cpi->svc;
if (!cpi->use_svc || cpi->svc.framedrop_mode == LAYER_DROP) {
RATE_CONTROL *const rc = &cpi->rc;
@@ -398,32 +435,56 @@ static int check_buffer(VP9_COMP *cpi, int drop_mark) {
} else {
int i;
// For SVC in the constrained framedrop mode (svc->framedrop_mode =
- // CONSTRAINED_LAYER_DROP): the condition on buffer (to drop frame) is
- // checked on current and upper spatial layers.
+ // CONSTRAINED_LAYER_DROP or FULL_SUPERFRAME_DROP): the condition on
+ // buffer (if its below threshold, so drop frame) is checked on current
+ // and upper spatial layers. For FULL_SUPERFRAME_DROP mode if any
+ // spatial layer is <= threshold, then we return 1 (drop).
for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) {
const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
- const int drop_mark_layer =
- (int)(cpi->svc.framedrop_thresh[i] * lrc->optimal_buffer_level / 100);
- if (!(lrc->buffer_level <= drop_mark_layer)) return 0;
+ // Exclude check for layer whose bitrate is 0.
+ if (lc->target_bandwidth > 0) {
+ const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] *
+ lrc->optimal_buffer_level / 100);
+ if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) {
+ if (lrc->buffer_level <= drop_mark_layer) return 1;
+ } else {
+ if (!(lrc->buffer_level <= drop_mark_layer)) return 0;
+ }
+ }
}
- return 1;
+ if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP)
+ return 0;
+ else
+ return 1;
}
}
-int vp9_rc_drop_frame(VP9_COMP *cpi) {
+static int drop_frame(VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+ SVC *svc = &cpi->svc;
int drop_frames_water_mark = oxcf->drop_frames_water_mark;
- if (cpi->use_svc)
- drop_frames_water_mark =
- cpi->svc.framedrop_thresh[cpi->svc.spatial_layer_id];
- if (!drop_frames_water_mark) {
+ if (cpi->use_svc) {
+ // If we have dropped max_consec_drop frames, then we don't
+ // drop this spatial layer, and reset counter to 0.
+ if (svc->drop_count[svc->spatial_layer_id] == svc->max_consec_drop) {
+ svc->drop_count[svc->spatial_layer_id] = 0;
+ return 0;
+ } else {
+ drop_frames_water_mark = svc->framedrop_thresh[svc->spatial_layer_id];
+ }
+ }
+ if (!drop_frames_water_mark ||
+ (svc->spatial_layer_id > 0 &&
+ svc->framedrop_mode == FULL_SUPERFRAME_DROP)) {
return 0;
} else {
- if (rc->buffer_level < 0) {
+ if ((rc->buffer_level < 0 && svc->framedrop_mode != FULL_SUPERFRAME_DROP) ||
+ (check_buffer_below_thresh(cpi, -1) &&
+ svc->framedrop_mode == FULL_SUPERFRAME_DROP)) {
// Always drop if buffer is below 0.
return 1;
} else {
@@ -431,9 +492,11 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark =
(int)(drop_frames_water_mark * rc->optimal_buffer_level / 100);
- if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
+ if (check_buffer_above_thresh(cpi, drop_mark) &&
+ (rc->decimation_factor > 0)) {
--rc->decimation_factor;
- } else if (check_buffer(cpi, drop_mark) && rc->decimation_factor == 0) {
+ } else if (check_buffer_below_thresh(cpi, drop_mark) &&
+ rc->decimation_factor == 0) {
rc->decimation_factor = 1;
}
if (rc->decimation_factor > 0) {
@@ -452,11 +515,81 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
}
}
+int vp9_rc_drop_frame(VP9_COMP *cpi) {
+ SVC *svc = &cpi->svc;
+ int svc_prev_layer_dropped = 0;
+ // In the constrained or full_superframe framedrop mode for svc
+ // (framedrop_mode != LAYER_DROP), if the previous spatial layer was
+ // dropped, drop the current spatial layer.
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc->drop_spatial_layer[svc->spatial_layer_id - 1])
+ svc_prev_layer_dropped = 1;
+ if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) ||
+ drop_frame(cpi)) {
+ vp9_rc_postencode_update_drop_frame(cpi);
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->last_frame_dropped = 1;
+ if (cpi->use_svc) {
+ svc->last_layer_dropped[svc->spatial_layer_id] = 1;
+ svc->drop_spatial_layer[svc->spatial_layer_id] = 1;
+ svc->drop_count[svc->spatial_layer_id]++;
+ svc->skip_enhancement_layer = 1;
+ if (svc->framedrop_mode == LAYER_DROP ||
+ svc->drop_spatial_layer[0] == 0) {
+ // For the case of constrained drop mode where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
+ if (svc->spatial_layer_id == svc->number_spatial_layers - 1) {
+ int i;
+ int all_layers_drop = 1;
+ for (i = 0; i < svc->spatial_layer_id; i++) {
+ if (svc->drop_spatial_layer[i] == 0) {
+ all_layers_drop = 0;
+ break;
+ }
+ }
+ if (all_layers_drop == 1) svc->skip_enhancement_layer = 0;
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int adjust_q_cbr(const VP9_COMP *cpi, int q) {
+ // This makes sure q is between oscillating Qs to prevent resonance.
+ if (!cpi->rc.reset_high_source_sad &&
+ (!cpi->oxcf.gf_cbr_boost_pct ||
+ !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
+ (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
+ cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
+ int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
+ VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ // If the previous frame had overshoot and the current q needs to increase
+ // above the clamped value, reduce the clamp for faster reaction to
+ // overshoot.
+ if (cpi->rc.rc_1_frame == -1 && q > qclamp)
+ q = (q + qclamp) >> 1;
+ else
+ q = qclamp;
+ }
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ vp9_cyclic_refresh_limit_q(cpi, &q);
+ return q;
+}
+
static double get_rate_correction_factor(const VP9_COMP *cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
double rcf;
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (frame_is_intra_only(cm)) {
rcf = rc->rate_correction_factors[KF_STD];
} else if (cpi->oxcf.pass == 2) {
RATE_FACTOR_LEVEL rf_lvl =
@@ -476,13 +609,14 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
// Normalize RCF to account for the size-dependent scaling factor.
factor /= rcf_mult[cpi->rc.frame_size_selector];
factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (frame_is_intra_only(cm)) {
rc->rate_correction_factors[KF_STD] = factor;
} else if (cpi->oxcf.pass == 2) {
RATE_FACTOR_LEVEL rf_lvl =
@@ -519,8 +653,9 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
projected_size_based_on_q =
vp9_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
} else {
+ FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
projected_size_based_on_q =
- vp9_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex, cm->MBs,
+ vp9_estimate_bits_at_q(frame_type, cm->base_qindex, cm->MBs,
rate_correction_factor, cm->bit_depth);
}
// Work out a size correction factor.
@@ -594,8 +729,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
bits_per_mb_at_this_q =
(int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
} else {
+ FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(
- cm->frame_type, i, correction_factor, cm->bit_depth);
+ frame_type, i, correction_factor, cm->bit_depth);
}
if (bits_per_mb_at_this_q <= target_bits_per_mb) {
@@ -610,22 +746,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
}
} while (++i <= active_worst_quality);
- // In CBR mode, this makes sure q is between oscillating Qs to prevent
- // resonance.
- if (cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.reset_high_source_sad &&
- (!cpi->oxcf.gf_cbr_boost_pct ||
- !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
- (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
- cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
- int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
- VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
- // If the previous had overshoot and the current q needs to increase above
- // the clamped value, reduce the clamp for faster reaction to overshoot.
- if (cpi->rc.rc_1_frame == -1 && q > qclamp)
- q = (q + qclamp) >> 1;
- else
- q = qclamp;
- }
+ // Adjustment to q for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) return adjust_q_cbr(cpi, q);
+
return q;
}
@@ -705,7 +828,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
int active_worst_quality;
int ambient_qp;
unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers;
- if (cm->frame_type == KEY_FRAME || rc->reset_high_source_sad)
+ if (frame_is_intra_only(cm) || rc->reset_high_source_sad)
return rc->worst_quality;
// For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
// for the first few frames following key frame. These are both initialized
@@ -730,8 +853,10 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
- // Maximum limit for down adjustment, ~30%.
+ // Maximum limit for down adjustment ~30%; make it lower for screen content.
int max_adjustment_down = active_worst_quality / 3;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ max_adjustment_down = active_worst_quality >> 3;
if (max_adjustment_down) {
buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
max_adjustment_down);
@@ -836,7 +961,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
*bottom_index = active_best_quality;
// Special case code to try and match quality with forced key frames
- if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
+ if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
} else {
q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
@@ -1075,7 +1200,7 @@ int vp9_frame_type_qdelta(const VP9_COMP *cpi, int rf_level, int q) {
#define STATIC_MOTION_THRESH 95
static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
- int *top_index) {
+ int *top_index, int gf_group_index) {
const VP9_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1122,6 +1247,11 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
active_best_quality /= 4;
}
+ // Dont allow the active min to be lossless (q0) unlesss the max q
+ // already indicates lossless.
+ active_best_quality =
+ VPXMIN(active_worst_quality, VPXMAX(1, active_best_quality));
+
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
q_adj_factor -= 0.25;
@@ -1164,7 +1294,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.
- if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW)
active_best_quality = (active_best_quality + cq_level + 1) / 2;
}
} else {
@@ -1200,12 +1330,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
}
}
+ // For normal frames do not allow an active minq lower than the q used for
+ // the last boosted frame.
+ if (!frame_is_intra_only(cm) &&
+ (!(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
+ rc->is_src_frame_alt_ref)) {
+ active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex);
+ }
+
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
vpx_clear_system_state();
// Static forced key frames Q restrictions dealt with elsewhere.
if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced ||
cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) {
- int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
+ int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group_index],
active_worst_quality);
active_worst_quality =
VPXMAX(active_worst_quality + qdelta, active_best_quality);
@@ -1261,13 +1399,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index,
int *top_index) {
int q;
+ const int gf_group_index = cpi->twopass.gf_group.index;
if (cpi->oxcf.pass == 0) {
if (cpi->oxcf.rc_mode == VPX_CBR)
q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
else
q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
} else {
- q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
+ q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index,
+ gf_group_index);
}
if (cpi->sf.use_nonrd_pick_mode) {
if (cpi->sf.force_frame_boost == 1) q -= cpi->sf.max_delta_qindex;
@@ -1280,6 +1420,62 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index,
return q;
}
+void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) {
+ TWO_PASS *const twopass = &cpi->twopass;
+
+ cpi->rc.is_src_frame_alt_ref = 0;
+ switch (twopass->gf_group.update_type[gf_group_index]) {
+ case KF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+ case LF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+ case GF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+ case OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ break;
+ default:
+ assert(twopass->gf_group.update_type[gf_group_index] == ARF_UPDATE);
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+ }
+}
+
+void vp9_estimate_qp_gop(VP9_COMP *cpi) {
+ int gop_length = cpi->rc.baseline_gf_interval;
+ int bottom_index, top_index;
+ int idx;
+ const int gf_index = cpi->twopass.gf_group.index;
+
+ for (idx = 1; idx <= gop_length + 1 && idx < MAX_LAG_BUFFERS; ++idx) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[idx];
+ int target_rate = cpi->twopass.gf_group.bit_allocation[idx];
+ cpi->twopass.gf_group.index = idx;
+ vp9_rc_set_frame_target(cpi, target_rate);
+ vp9_configure_buffer_updates(cpi, idx);
+ tpl_frame->base_qindex =
+ rc_pick_q_and_bounds_two_pass(cpi, &bottom_index, &top_index, idx);
+ tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1);
+ }
+ // Reset the actual index and frame update
+ cpi->twopass.gf_group.index = gf_index;
+ vp9_configure_buffer_updates(cpi, gf_index);
+}
+
void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit) {
@@ -1386,7 +1582,8 @@ static void compute_frame_low_motion(VP9_COMP *const cpi) {
int cnt_zeromv = 0;
for (mi_row = 0; mi_row < rows; mi_row++) {
for (mi_col = 0; mi_col < cols; mi_col++) {
- if (abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16)
+ if (mi[0]->ref_frame[0] == LAST_FRAME &&
+ abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16)
cnt_zeromv++;
mi++;
}
@@ -1400,6 +1597,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+ SVC *const svc = &cpi->svc;
const int qindex = cm->base_qindex;
// Update rate control heuristics
@@ -1409,7 +1607,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
vp9_rc_update_rate_correction_factors(cpi);
// Keep a record of last Q and ambient average Q.
- if (cm->frame_type == KEY_FRAME) {
+ if (frame_is_intra_only(cm)) {
rc->last_q[KEY_FRAME] = qindex;
rc->avg_frame_qindex[KEY_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
@@ -1453,13 +1651,13 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
(cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
rc->last_boosted_qindex = qindex;
}
- if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex;
+ if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex;
update_buffer_level(cpi, rc->projected_frame_size);
// Rolling monitors of whether we are over or underspending used to help
// regulate min and Max Q in two pass.
- if (cm->frame_type != KEY_FRAME) {
+ if (!frame_is_intra_only(cm)) {
rc->rolling_target_bits = ROUND_POWER_OF_TWO(
rc->rolling_target_bits * 3 + rc->this_frame_target, 2);
rc->rolling_actual_bits = ROUND_POWER_OF_TWO(
@@ -1478,7 +1676,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
if (!cpi->use_svc) {
if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
- (cm->frame_type != KEY_FRAME))
+ (!frame_is_intra_only(cm)))
// Update the alternate reference frame stats as appropriate.
update_alt_ref_frame_stats(cpi);
else
@@ -1486,7 +1684,28 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
update_golden_frame_stats(cpi);
}
- if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
+ // If second (long term) temporal reference is used for SVC,
+ // update the golden frame counter, only for base temporal layer.
+ if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer &&
+ svc->temporal_layer_id == 0) {
+ int i = 0;
+ if (cpi->refresh_golden_frame)
+ rc->frames_since_golden = 0;
+ else
+ rc->frames_since_golden++;
+ // Decrement count down till next gf
+ if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
+ // Update the frames_since_golden for all upper temporal layers.
+ for (i = 1; i < svc->number_temporal_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->frames_since_golden = rc->frames_since_golden;
+ }
+ }
+
+ if (frame_is_intra_only(cm)) rc->frames_since_key = 0;
if (cm->show_frame) {
rc->frames_since_key++;
rc->frames_to_key--;
@@ -1500,18 +1719,34 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
if (oxcf->pass == 0) {
- if (cm->frame_type != KEY_FRAME) {
+ if (!frame_is_intra_only(cm) &&
+ (!cpi->use_svc ||
+ (cpi->use_svc &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame &&
+ svc->spatial_layer_id == svc->number_spatial_layers - 1))) {
compute_frame_low_motion(cpi);
if (cpi->sf.use_altref_onepass) update_altref_usage(cpi);
}
+ // For SVC: set avg_frame_low_motion (only computed on top spatial layer)
+ // to all lower spatial layers.
+ if (cpi->use_svc &&
+ svc->spatial_layer_id == svc->number_spatial_layers - 1) {
+ int i;
+ for (i = 0; i < svc->number_spatial_layers - 1; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_frame_low_motion = rc->avg_frame_low_motion;
+ }
+ }
cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref;
}
- if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
+ if (!frame_is_intra_only(cm)) rc->reset_high_source_sad = 0;
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
- if (cpi->use_svc &&
- cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
- cpi->svc.lower_layer_qindex = cm->base_qindex;
+ if (cpi->use_svc && svc->spatial_layer_id < svc->number_spatial_layers - 1)
+ svc->lower_layer_qindex = cm->base_qindex;
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1707,27 +1942,78 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
return vp9_rc_clamp_iframe_target_size(cpi, target);
}
+static void set_intra_only_frame(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+ // Don't allow intra_only frame for bypass/flexible SVC mode, or if number
+ // of spatial layers is 1 or if number of spatial or temporal layers > 3.
+ // Also if intra-only is inserted on very first frame, don't allow if
+ // if number of temporal layers > 1. This is because on intra-only frame
+ // only 3 reference buffers can be updated, but for temporal layers > 1
+ // we generally need to use buffer slots 4 and 5.
+ if ((cm->current_video_frame == 0 && svc->number_temporal_layers > 1) ||
+ svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS ||
+ svc->number_spatial_layers > 3 || svc->number_temporal_layers > 3 ||
+ svc->number_spatial_layers == 1)
+ return;
+ cm->show_frame = 0;
+ cm->intra_only = 1;
+ cm->frame_type = INTER_FRAME;
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ cpi->ext_refresh_golden_frame = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ if (cm->current_video_frame == 0) {
+ cpi->lst_fb_idx = 0;
+ cpi->gld_fb_idx = 1;
+ cpi->alt_fb_idx = 2;
+ } else {
+ int i;
+ int count = 0;
+ cpi->lst_fb_idx = -1;
+ cpi->gld_fb_idx = -1;
+ cpi->alt_fb_idx = -1;
+ // For intra-only frame we need to refresh all slots that were
+ // being used for the base layer (fb_idx_base[i] == 1).
+ // Start with assigning last first, then golden and then alt.
+ for (i = 0; i < REF_FRAMES; ++i) {
+ if (svc->fb_idx_base[i] == 1) count++;
+ if (count == 1 && cpi->lst_fb_idx == -1) cpi->lst_fb_idx = i;
+ if (count == 2 && cpi->gld_fb_idx == -1) cpi->gld_fb_idx = i;
+ if (count == 3 && cpi->alt_fb_idx == -1) cpi->alt_fb_idx = i;
+ }
+ // If golden or alt is not being used for base layer, then set them
+ // to the lst_fb_idx.
+ if (cpi->gld_fb_idx == -1) cpi->gld_fb_idx = cpi->lst_fb_idx;
+ if (cpi->alt_fb_idx == -1) cpi->alt_fb_idx = cpi->lst_fb_idx;
+ }
+}
+
void vp9_rc_get_svc_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ SVC *const svc = &cpi->svc;
int target = rc->avg_frame_bandwidth;
- int layer =
- LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
+ int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
// Periodic key frames is based on the super-frame counter
// (svc.current_superframe), also only base spatial layer is key frame.
- if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ // Key frame is set for any of the following: very first frame, frame flags
+ // indicates key, superframe counter hits key frequencey, or (non-intra) sync
+ // flag is set for spatial layer 0.
+ if ((cm->current_video_frame == 0 && !svc->previous_frame_is_intra_only) ||
+ (cpi->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key &&
- (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) &&
- cpi->svc.spatial_layer_id == 0)) {
+ (svc->current_superframe % cpi->oxcf.key_freq == 0) &&
+ !svc->previous_frame_is_intra_only && svc->spatial_layer_id == 0) ||
+ (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
if (is_one_pass_cbr_svc(cpi)) {
if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi);
- layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- cpi->svc.layer_context[layer].is_key_frame = 1;
+ layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ svc->layer_context[layer].is_key_frame = 1;
cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
// Assumption here is that LAST_FRAME is being updated for a keyframe.
// Thus no change in update flags.
@@ -1736,25 +2022,73 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
} else {
cm->frame_type = INTER_FRAME;
if (is_one_pass_cbr_svc(cpi)) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) {
- lc->is_key_frame = 0;
- } else {
- lc->is_key_frame =
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
- }
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ // Add condition current_video_frame > 0 for the case where first frame
+ // is intra only followed by overlay/copy frame. In this case we don't
+ // want to reset is_key_frame to 0 on overlay/copy frame.
+ lc->is_key_frame =
+ (svc->spatial_layer_id == 0 && cm->current_video_frame > 0)
+ ? 0
+ : svc->layer_context[svc->temporal_layer_id].is_key_frame;
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
}
+ // Check if superframe contains a sync layer request.
+ vp9_svc_check_spatial_layer_sync(cpi);
+
+ // If long term termporal feature is enabled, set the period of the update.
+ // The update/refresh of this reference frame is always on base temporal
+ // layer frame.
+ if (svc->use_gf_temporal_ref_current_layer) {
+ // Only use gf long-term prediction on non-key superframes.
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // Use golden for this reference, which will be used for prediction.
+ int index = svc->spatial_layer_id;
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->gld_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ // Enable prediction off LAST (last reference) and golden (which will
+ // generally be further behind/long-term reference).
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ // Check for update/refresh of reference: only refresh on base temporal
+ // layer.
+ if (svc->temporal_layer_id == 0) {
+ if (svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // On key frame we update the buffer index used for long term reference.
+ // Use the alt_ref since it is not used or updated on key frames.
+ int index = svc->spatial_layer_id;
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ } else if (rc->frames_till_gf_update_due == 0) {
+ // Set perdiod of next update. Make it a multiple of 10, as the cyclic
+ // refresh is typically ~10%, and we'd like the update to happen after
+ // a few cylces of the refresh (so it better quality frame). Note the
+ // cyclic refresh for SVC only operates on base temporal layer frames.
+ // Choose 20 as perdiod for now (2 cycles).
+ rc->baseline_gf_interval = 20;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ cpi->ext_refresh_golden_frame = 1;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+ }
+ } else if (!svc->use_gf_temporal_ref) {
+ rc->frames_till_gf_update_due = INT_MAX;
+ rc->baseline_gf_interval = INT_MAX;
+ }
+ if (svc->set_intra_only_frame) {
+ set_intra_only_frame(cpi);
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ }
// Any update/change of global cyclic refresh parameters (amount/delta-qp)
// should be done here, before the frame qp is selected.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_parameters(cpi);
vp9_rc_set_frame_target(cpi, target);
- rc->frames_till_gf_update_due = INT_MAX;
- rc->baseline_gf_interval = INT_MAX;
}
void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
@@ -1762,8 +2096,8 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
int target;
// TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
- if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
+ if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0)) {
cm->frame_type = KEY_FRAME;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
@@ -1790,7 +2124,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_parameters(cpi);
- if (cm->frame_type == KEY_FRAME)
+ if (frame_is_intra_only(cm))
target = calc_iframe_target_size_one_pass_cbr(cpi);
else
target = calc_pframe_target_size_one_pass_cbr(cpi);
@@ -2426,6 +2760,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad)
rc->this_frame_target = rc->avg_frame_bandwidth;
}
+ // For SVC the new (updated) avg_source_sad[0] for the current superframe
+ // updates the setting for all layers.
+ if (cpi->use_svc) {
+ int sl, tl;
+ SVC *const svc = &cpi->svc;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl)
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_source_sad[0] = rc->avg_source_sad[0];
+ }
+ }
// For VBR, under scene change/high content change, force golden refresh.
if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME &&
rc->high_source_sad && rc->frames_to_key > 3 &&
@@ -2459,8 +2806,11 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
- int thresh_qp = 3 * (rc->worst_quality >> 2);
- int thresh_rate = rc->avg_frame_bandwidth * 10;
+ int thresh_qp = 7 * (rc->worst_quality >> 3);
+ int thresh_rate = rc->avg_frame_bandwidth << 3;
+ // Lower rate threshold for video.
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ thresh_rate = rc->avg_frame_bandwidth << 2;
if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) {
double rate_correction_factor =
cpi->rc.rate_correction_factors[INTER_NORMAL];
@@ -2471,6 +2821,28 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
int enumerator;
// Force a re-encode, and for now use max-QP.
*q = cpi->rc.worst_quality;
+ cpi->cyclic_refresh->counter_encode_maxq_scene_change = 0;
+ cpi->rc.re_encode_maxq_scene_change = 1;
+ // If the frame_size is much larger than the threshold (big content change)
+ // and the encoded frame used alot of Intra modes, then force hybrid_intra
+ // encoding for the re-encode on this scene change. hybrid_intra will
+ // use rd-based intra mode selection for small blocks.
+ if (frame_size > (thresh_rate << 1) && cpi->svc.spatial_layer_id == 0) {
+ MODE_INFO **mi = cm->mi_grid_visible;
+ int sum_intra_usage = 0;
+ int mi_row, mi_col;
+ int tot = 0;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ if (mi[0]->ref_frame[0] == INTRA_FRAME) sum_intra_usage++;
+ tot++;
+ mi++;
+ }
+ mi += 8;
+ }
+ sum_intra_usage = 100 * sum_intra_usage / (cm->mi_rows * cm->mi_cols);
+ if (sum_intra_usage > 60) cpi->rc.hybrid_intra_scene_change = 1;
+ }
// Adjust avg_frame_qindex, buffer_level, and rate correction factors, as
// these parameters will affect QP selection for subsequent frames. If they
// have settled down to a very different (low QP) state, then not adjusting
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 3a40e0138..cf37117f9 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -114,6 +114,16 @@ typedef struct {
int source_alt_ref_active;
int is_src_frame_alt_ref;
+ // Length of the bi-predictive frame group interval
+ int bipred_group_interval;
+
+ // NOTE: Different types of frames may have different bits allocated
+ // accordingly, aiming to achieve the overall optimal RD performance.
+ int is_bwd_ref_frame;
+ int is_last_bipred_frame;
+ int is_bipred_frame;
+ int is_src_frame_ext_arf;
+
int avg_frame_bandwidth; // Average frame size target for clip
int min_frame_bandwidth; // Minimum allocation used for any frame
int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
@@ -179,6 +189,8 @@ typedef struct {
int last_frame_is_src_altref;
int high_source_sad;
int count_last_scene_change;
+ int hybrid_intra_scene_change;
+ int re_encode_maxq_scene_change;
int avg_frame_low_motion;
int af_ratio_onepass_vbr;
int force_qpmin;
@@ -302,6 +314,10 @@ void vp9_scene_detection_onepass(struct VP9_COMP *cpi);
int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q);
+void vp9_configure_buffer_updates(struct VP9_COMP *cpi, int gf_group_index);
+
+void vp9_estimate_qp_gop(struct VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 3407e74c6..dcdd00d92 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -200,6 +200,38 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
return (int)rdmult;
}
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+ const VP9_COMMON *cm = &cpi->common;
+ int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t rdmult = 0;
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
+ case VPX_BITS_10:
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+ break;
+ default:
+ assert(cpi->common.bit_depth == VPX_BITS_12);
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
+ break;
+ }
+#else
+ int64_t rdmult = (int)((88 * q * q / beta) / 24);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
+ const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+
+ rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
+ rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+ }
+ if (rdmult < 1) rdmult = 1;
+ return (int)rdmult;
+}
+
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
double q;
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 59022c106..919f74ebd 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -108,9 +108,14 @@ typedef struct RD_OPT {
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#if CONFIG_CONSISTENT_RECODE
+ int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES];
+ int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#endif
int RDMULT;
int RDDIV;
+ double r0;
} RD_OPT;
typedef struct RD_COST {
@@ -134,6 +139,8 @@ int64_t vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi,
int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
+int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta);
+
void vp9_initialize_rd_consts(struct VP9_COMP *cpi);
void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e39df033a..4005f85b1 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -783,7 +783,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
- TX_SIZE tx_size, int use_fast_coef_casting) {
+ TX_SIZE tx_size, int use_fast_coef_costing) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[plane];
struct rdcost_block_args args;
@@ -791,7 +791,7 @@ static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
args.cpi = cpi;
args.x = x;
args.best_rd = ref_best_rd;
- args.use_fast_coef_costing = use_fast_coef_casting;
+ args.use_fast_coef_costing = use_fast_coef_costing;
args.skippable = 1;
if (plane == 0) xd->mi[0]->tx_size = tx_size;
@@ -847,7 +847,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
{ INT64_MAX, INT64_MAX } };
int n;
int s0, s1;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = ref_best_rd;
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
const int tx_size_ctx = get_tx_size_context(xd);
@@ -868,8 +868,8 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
for (n = start_tx; n >= end_tx; n--) {
const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n];
- txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
- bs, n, cpi->sf.use_fast_coef_costing);
+ txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
r[n][1] += r_tx_size;
@@ -3073,6 +3073,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
// lock mechanism involved with reads from
// tile_mode_map
const int mode_search_skip_flags = sf->mode_search_skip_flags;
+ const int is_rect_partition =
+ num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize];
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
@@ -3224,6 +3226,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
vp9_zero(x->sum_y_eobs);
+ if (is_rect_partition) {
+ if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue;
+ if (second_ref_frame > 0 &&
+ (ctx->skip_ref_frame_mask & (1 << second_ref_frame)))
+ continue;
+ }
+
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
if (midx == mode_skip_start && best_mode_index >= 0) {
@@ -3612,9 +3621,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
- // If adaptive interp filter is enabled, then the current leaf node of 8x8
- // data is needed for sub8x8. Hence preserve the context.
+// If adaptive interp filter is enabled, then the current leaf node of 8x8
+// data is needed for sub8x8. Hence preserve the context.
+#if CONFIG_CONSISTENT_RECODE
+ if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#else
if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#endif
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
return;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 90da68726..75a8de270 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -61,32 +61,58 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
VP9_COMMON *const cm = &cpi->common;
+ const int min_frame_size = VPXMIN(cm->width, cm->height);
+ const int is_480p_or_larger = min_frame_size >= 480;
+ const int is_720p_or_larger = min_frame_size >= 720;
+ const int is_1080p_or_larger = min_frame_size >= 1080;
+ const int is_2160p_or_larger = min_frame_size >= 2160;
// speed 0 features
sf->partition_search_breakout_thr.dist = (1 << 20);
sf->partition_search_breakout_thr.rate = 80;
+ sf->use_square_only_threshold = BLOCK_SIZES;
- // Currently, the machine-learning based partition search early termination
- // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
- if (VPXMIN(cm->width, cm->height) >= 480) {
+ if (is_480p_or_larger) {
+ // Currently, the machine-learning based partition search early termination
+ // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
sf->ml_partition_search_early_termination = 1;
+ } else {
+ sf->use_square_only_threshold = BLOCK_32X32;
+ }
+
+ if (!is_1080p_or_larger) {
+ sf->use_ml_partition_search_breakout = 1;
+ if (is_720p_or_larger) {
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[2] = 0.0f;
+ } else {
+ sf->ml_partition_search_breakout_thresh[0] = 2.5f;
+ sf->ml_partition_search_breakout_thresh[1] = 1.5f;
+ sf->ml_partition_search_breakout_thresh[2] = 1.5f;
+ }
}
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
+ sf->use_square_only_threshold = BLOCK_4X4;
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ if (is_720p_or_larger) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 23);
+ sf->use_ml_partition_search_breakout = 0;
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 21);
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[2] = 0.0f;
}
}
if (speed >= 2) {
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ if (is_720p_or_larger) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
@@ -96,11 +122,14 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->partition_search_breakout_thr.dist = (1 << 22);
sf->partition_search_breakout_thr.rate = 100;
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -1.0f;
+ sf->ml_partition_search_breakout_thresh[2] = -4.0f;
}
sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
// Use a set of speed features for 4k videos.
- if (VPXMIN(cm->width, cm->height) >= 2160) {
+ if (is_2160p_or_larger) {
sf->use_square_partition_only = 1;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
@@ -112,7 +141,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
}
if (speed >= 3) {
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->use_ml_partition_search_breakout = 0;
+ if (is_720p_or_larger) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
sf->partition_search_breakout_thr.dist = (1 << 25);
@@ -137,7 +167,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
if (speed >= 4) {
sf->partition_search_breakout_thr.rate = 300;
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ if (is_720p_or_larger) {
sf->partition_search_breakout_thr.dist = (1 << 26);
} else {
sf->partition_search_breakout_thr.dist = (1 << 24);
@@ -167,7 +197,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->allow_skip_recode = 1;
sf->less_rectangular_check = 1;
sf->use_square_partition_only = !frame_is_boosted(cpi);
- sf->use_square_only_threshold = BLOCK_16X16;
+ sf->prune_ref_frame_for_rect_partitions = 1;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
sf->exhaustive_searches_thresh = (1 << 22);
@@ -183,6 +213,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
}
if (speed >= 1) {
+ sf->enable_tpl_model = 0;
+ sf->prune_ref_frame_for_rect_partitions = 0;
if (oxcf->pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
@@ -199,10 +231,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
-
- sf->use_square_only_threshold = BLOCK_4X4;
sf->less_rectangular_check = 1;
-
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->mv.auto_mv_step_size = 1;
@@ -375,6 +404,9 @@ static void set_rt_speed_feature_framesize_independent(
sf->nonrd_keyframe = 0;
sf->svc_use_lowres_part = 0;
sf->re_encode_overshoot_rt = 0;
+ sf->disable_16x16part_nonkey = 0;
+ sf->disable_golden_ref = 0;
+ sf->enable_tpl_model = 0;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -537,8 +569,14 @@ static void set_rt_speed_feature_framesize_independent(
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
- cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
sf->re_encode_overshoot_rt = 1;
+ }
+ if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
+ cm->width <= 1280 && cm->height <= 720) {
+ sf->use_altref_onepass = 1;
+ sf->use_compound_nonrd_pickmode = 1;
+ }
}
if (speed >= 6) {
@@ -621,6 +659,12 @@ static void set_rt_speed_feature_framesize_independent(
cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 &&
cpi->oxcf.width * cpi->oxcf.height > 640 * 480)
sf->svc_use_lowres_part = 1;
+ // For SVC when golden is used as second temporal reference: to avoid
+ // encode time increase only use this feature on base temporal layer.
+ // (i.e remove golden flag from frame_flags for temporal_layer_id > 0).
+ if (cpi->use_svc && cpi->svc.use_gf_temporal_ref_current_layer &&
+ cpi->svc.temporal_layer_id > 0)
+ cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
}
if (speed >= 8) {
@@ -661,6 +705,22 @@ static void set_rt_speed_feature_framesize_independent(
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 1;
}
+
+ if (speed >= 9) {
+ sf->mv.enable_adaptive_subpel_force_stop = 1;
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
+ if (cpi->rc.avg_frame_low_motion < 40)
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_below = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_above = 2;
+ // Disable partition blocks below 16x16, except for low-resolutions.
+ if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240)
+ sf->disable_16x16part_nonkey = 1;
+ // Allow for disabling GOLDEN reference, for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1;
+ if (cpi->rc.avg_frame_low_motion < 65) sf->default_interp_filter = BILINEAR;
+ }
+
if (sf->use_altref_onepass) {
if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
sf->partition_search_type = FIXED_PARTITION;
@@ -675,6 +735,10 @@ static void set_rt_speed_feature_framesize_independent(
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_lastgolden_frame_usage));
}
+ if (cpi->svc.previous_frame_is_intra_only) {
+ sf->partition_search_type = FIXED_PARTITION;
+ sf->always_this_block_size = BLOCK_64X64;
+ }
}
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
@@ -688,6 +752,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
sf->partition_search_breakout_thr.dist = (1 << 19);
sf->partition_search_breakout_thr.rate = 80;
sf->ml_partition_search_early_termination = 0;
+ sf->use_ml_partition_search_breakout = 0;
if (oxcf->mode == REALTIME) {
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
@@ -780,6 +845,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = 99.0;
sf->allow_acl = 1;
+#if CONFIG_VP9_HIGHBITDEPTH
+ sf->enable_tpl_model = 0;
+#else
+ sf->enable_tpl_model = 1;
+#endif
+ sf->prune_ref_frame_for_rect_partitions = 0;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 946bf0545..fd4973fb2 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -161,6 +161,17 @@ typedef enum {
ONE_LOOP_REDUCED = 1
} FAST_COEFF_UPDATE;
+typedef struct ADAPT_SUBPEL_FORCE_STOP {
+ // Threshold for full pixel motion vector;
+ int mv_thresh;
+
+ // subpel_force_stop if full pixel MV is below the threshold.
+ int force_stop_below;
+
+ // subpel_force_stop if full pixel MV is equal to or above the threshold.
+ int force_stop_above;
+} ADAPT_SUBPEL_FORCE_STOP;
+
typedef struct MV_SPEED_FEATURES {
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
@@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES {
// 3: Stop at full pixel.
int subpel_force_stop;
+ // If it's enabled, different subpel_force_stop will be used for different MV.
+ int enable_adaptive_subpel_force_stop;
+
+ ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop;
+
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
} MV_SPEED_FEATURES;
@@ -258,6 +274,9 @@ typedef struct SPEED_FEATURES {
// alternate reference frames.
int allow_acl;
+ // Temporal dependency model based encoding mode optimization
+ int enable_tpl_model;
+
// Use transform domain distortion. Use pixel domain distortion in speed 0
// and certain situations in higher speed to improve the RD model precision.
int allow_txfm_domain_distortion;
@@ -300,6 +319,9 @@ typedef struct SPEED_FEATURES {
int use_square_partition_only;
BLOCK_SIZE use_square_only_threshold;
+ // Prune reference frames for rectangular partitions.
+ int prune_ref_frame_for_rect_partitions;
+
// Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor.
AUTO_MIN_MAX_MODE auto_min_max_partition_size;
@@ -451,6 +473,10 @@ typedef struct SPEED_FEATURES {
// Partition search early breakout thresholds.
PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr;
+ // Use ML-based partition search early breakout.
+ int use_ml_partition_search_breakout;
+ float ml_partition_search_breakout_thresh[3];
+
// Machine-learning based partition search early termination
int ml_partition_search_early_termination;
@@ -515,6 +541,12 @@ typedef struct SPEED_FEATURES {
// Enable re-encoding on scene change with potential high overshoot,
// for real-time encoding flow.
int re_encode_overshoot_rt;
+
+ // Disable partitioning of 16x16 blocks.
+ int disable_16x16part_nonkey;
+
+ // Allow for disabling golden reference.
+ int disable_golden_ref;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 07d1995a8..0b7e7fe80 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -29,10 +29,11 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
- svc->first_spatial_layer_to_encode = 0;
svc->force_zero_mode_spatial_ref = 0;
svc->use_base_mv = 0;
svc->use_partition_reuse = 0;
+ svc->use_gf_temporal_ref = 1;
+ svc->use_gf_temporal_ref_current_layer = 0;
svc->scaled_temp_is_alloc = 0;
svc->scaled_one_half = 0;
svc->current_superframe = 0;
@@ -40,8 +41,15 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->skip_enhancement_layer = 0;
svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON;
svc->framedrop_mode = CONSTRAINED_LAYER_DROP;
-
- for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
+ svc->set_intra_only_frame = 0;
+ svc->previous_frame_is_intra_only = 0;
+ svc->superframe_has_layer_sync = 0;
+
+ for (i = 0; i < REF_FRAMES; ++i) {
+ svc->fb_idx_spatial_layer_id[i] = -1;
+ svc->fb_idx_temporal_layer_id[i] = -1;
+ svc->fb_idx_base[i] = 0;
+ }
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
svc->last_layer_dropped[sl] = 0;
svc->drop_spatial_layer[sl] = 0;
@@ -52,7 +60,16 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->downsample_filter_type[sl] = BILINEAR;
svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
+ svc->fb_idx_upd_tl0[sl] = -1;
+ svc->drop_count[sl] = 0;
+ svc->spatial_layer_sync[sl] = 0;
}
+ svc->max_consec_drop = INT_MAX;
+
+ svc->buffer_gf_temporal_ref[1].idx = 7;
+ svc->buffer_gf_temporal_ref[0].idx = 6;
+ svc->buffer_gf_temporal_ref[1].is_used = 0;
+ svc->buffer_gf_temporal_ref[0].is_used = 0;
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH,
@@ -665,24 +682,24 @@ void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) {
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0;
+ SVC *const svc = &cpi->svc;
LAYER_CONTEXT *lc = NULL;
- cpi->svc.skip_enhancement_layer = 0;
- if (cpi->svc.number_spatial_layers > 1) {
- cpi->svc.use_base_mv = 1;
- cpi->svc.use_partition_reuse = 1;
+ svc->skip_enhancement_layer = 0;
+ if (svc->number_spatial_layers > 1) {
+ svc->use_base_mv = 1;
+ svc->use_partition_reuse = 1;
}
- cpi->svc.force_zero_mode_spatial_ref = 1;
- cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride;
+ svc->force_zero_mode_spatial_ref = 1;
+ svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride;
- if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
- } else if (cpi->svc.temporal_layering_mode ==
+ } else if (svc->temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
- } else if (cpi->svc.temporal_layering_mode ==
- VP9E_TEMPORAL_LAYERING_MODE_0101) {
+ } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) {
set_flags_and_fb_idx_for_temporal_mode2(cpi);
- } else if (cpi->svc.temporal_layering_mode ==
+ } else if (svc->temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
// In the BYPASS/flexible mode, the encoder is relying on the application
// to specify, for each spatial layer, the flags and buffer indices for the
@@ -694,42 +711,82 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
// this case.
if (cpi->ext_refresh_frame_flags_pending == 0) {
int sl;
- cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
- sl = cpi->svc.spatial_layer_id;
- vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]);
- cpi->lst_fb_idx = cpi->svc.lst_fb_idx[sl];
- cpi->gld_fb_idx = cpi->svc.gld_fb_idx[sl];
- cpi->alt_fb_idx = cpi->svc.alt_fb_idx[sl];
+ svc->spatial_layer_id = svc->spatial_layer_to_encode;
+ sl = svc->spatial_layer_id;
+ vp9_apply_encoding_flags(cpi, svc->ext_frame_flags[sl]);
+ cpi->lst_fb_idx = svc->lst_fb_idx[sl];
+ cpi->gld_fb_idx = svc->gld_fb_idx[sl];
+ cpi->alt_fb_idx = svc->alt_fb_idx[sl];
+ }
+ }
+
+ if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[0].idx ||
+ cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[0].idx ||
+ cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[0].idx)
+ svc->buffer_gf_temporal_ref[0].is_used = 1;
+ if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[1].idx ||
+ cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[1].idx ||
+ cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[1].idx)
+ svc->buffer_gf_temporal_ref[1].is_used = 1;
+
+ // For the fixed (non-flexible/bypass) SVC mode:
+ // If long term temporal reference is enabled at the sequence level
+ // (use_gf_temporal_ref == 1), and inter_layer is disabled (on inter-frames),
+ // we can use golden as a second temporal reference
+ // (since the spatial/inter-layer reference is disabled).
+ // We check that the fb_idx for this reference (buffer_gf_temporal_ref.idx) is
+ // unused (slot 7 and 6 should be available for 3-3 layer system).
+ // For now usage of this second temporal reference will only be used for
+ // highest and next to highest spatial layer (i.e., top and middle layer for
+ // 3 spatial layers).
+ svc->use_gf_temporal_ref_current_layer = 0;
+ if (svc->use_gf_temporal_ref && !svc->buffer_gf_temporal_ref[0].is_used &&
+ !svc->buffer_gf_temporal_ref[1].is_used &&
+ svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred != INTER_LAYER_PRED_ON &&
+ svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 &&
+ svc->spatial_layer_id >= svc->number_spatial_layers - 2) {
+ // Enable the second (long-term) temporal reference at the frame-level.
+ svc->use_gf_temporal_ref_current_layer = 1;
+ }
+
+ // Check if current superframe has any layer sync, only check once on
+ // base layer.
+ if (svc->spatial_layer_id == 0) {
+ int sl = 0;
+ // Default is no sync.
+ svc->superframe_has_layer_sync = 0;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
+ if (cpi->svc.spatial_layer_sync[sl]) svc->superframe_has_layer_sync = 1;
}
}
// Reset the drop flags for all spatial layers, on the base layer.
- if (cpi->svc.spatial_layer_id == 0) {
- vp9_zero(cpi->svc.drop_spatial_layer);
- // TODO(jianj/marpan): Investigate why setting cpi->svc.lst/gld/alt_fb_idx
+ if (svc->spatial_layer_id == 0) {
+ vp9_zero(svc->drop_spatial_layer);
+ // TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx
// causes an issue with frame dropping and temporal layers, when the frame
// flags are passed via the encode call (bypass mode). Issue is that we're
// resetting ext_refresh_frame_flags_pending to 0 on frame drops.
- if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
- memset(&cpi->svc.lst_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx));
- memset(&cpi->svc.gld_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx));
- memset(&cpi->svc.alt_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx));
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx));
+ memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx));
+ memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx));
}
- vp9_zero(cpi->svc.update_last);
- vp9_zero(cpi->svc.update_golden);
- vp9_zero(cpi->svc.update_altref);
- vp9_zero(cpi->svc.reference_last);
- vp9_zero(cpi->svc.reference_golden);
- vp9_zero(cpi->svc.reference_altref);
+ vp9_zero(svc->update_last);
+ vp9_zero(svc->update_golden);
+ vp9_zero(svc->update_altref);
+ vp9_zero(svc->reference_last);
+ vp9_zero(svc->reference_golden);
+ vp9_zero(svc->reference_altref);
}
- lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id];
+ lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
+ svc->temporal_layer_id];
// Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS,
// only for non-BYPASS mode for now.
- if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
RATE_CONTROL *const lrc = &lc->rc;
lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q);
lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q);
@@ -741,58 +798,58 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
// Use Eightap_smooth for low resolutions.
if (width * height <= 320 * 240)
- cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] =
- EIGHTTAP_SMOOTH;
+ svc->downsample_filter_type[svc->spatial_layer_id] = EIGHTTAP_SMOOTH;
// For scale factors > 0.75, set the phase to 0 (aligns decimated pixel
// to source pixel).
- lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id];
+ lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
+ svc->temporal_layer_id];
if (lc->scaling_factor_num > (3 * lc->scaling_factor_den) >> 2)
- cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 0;
+ svc->downsample_filter_phase[svc->spatial_layer_id] = 0;
// The usage of use_base_mv or partition_reuse assumes down-scale of 2x2.
// For now, turn off use of base motion vectors and partition reuse if the
// spatial scale factors for any layers are not 2,
// keep the case of 3 spatial layers with scale factor of 4x4 for base layer.
// TODO(marpan): Fix this to allow for use_base_mv for scale factors != 2.
- if (cpi->svc.number_spatial_layers > 1) {
+ if (svc->number_spatial_layers > 1) {
int sl;
- for (sl = 0; sl < cpi->svc.number_spatial_layers - 1; ++sl) {
- lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id];
+ for (sl = 0; sl < svc->number_spatial_layers - 1; ++sl) {
+ lc = &svc->layer_context[sl * svc->number_temporal_layers +
+ svc->temporal_layer_id];
if ((lc->scaling_factor_num != lc->scaling_factor_den >> 1) &&
!(lc->scaling_factor_num == lc->scaling_factor_den >> 2 && sl == 0 &&
- cpi->svc.number_spatial_layers == 3)) {
- cpi->svc.use_base_mv = 0;
- cpi->svc.use_partition_reuse = 0;
+ svc->number_spatial_layers == 3)) {
+ svc->use_base_mv = 0;
+ svc->use_partition_reuse = 0;
break;
}
}
// For non-zero spatial layers: if the previous spatial layer was dropped
// disable the base_mv and partition_reuse features.
- if (cpi->svc.spatial_layer_id > 0 &&
- cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
- cpi->svc.use_base_mv = 0;
- cpi->svc.use_partition_reuse = 0;
+ if (svc->spatial_layer_id > 0 &&
+ svc->drop_spatial_layer[svc->spatial_layer_id - 1]) {
+ svc->use_base_mv = 0;
+ svc->use_partition_reuse = 0;
}
}
- cpi->svc.non_reference_frame = 0;
+ svc->non_reference_frame = 0;
if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame &&
!cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame) {
- cpi->svc.non_reference_frame = 1;
+ svc->non_reference_frame = 1;
}
- if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0;
+ if (svc->spatial_layer_id == 0) svc->high_source_sad_superframe = 0;
- if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
- cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id]) {
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->last_layer_dropped[svc->spatial_layer_id] &&
+ svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
// For fixed/non-flexible mode, if the previous frame (same spatial layer
// from previous superframe) was dropped, make sure the lst_fb_idx
// for this frame corresponds to the buffer index updated on (last) encoded
// TL0 frame (with same spatial layer).
- cpi->lst_fb_idx = cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id];
+ cpi->lst_fb_idx = svc->fb_idx_upd_tl0[svc->spatial_layer_id];
}
if (vp9_set_size_literal(cpi, width, height) != 0)
@@ -886,8 +943,11 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
// Check for disabling inter-layer (spatial) prediction, if
// svc.disable_inter_layer_pred is set. If the previous spatial layer was
// dropped then disable the prediction from this (scaled) reference.
+ // For INTER_LAYER_PRED_OFF_NONKEY: inter-layer prediction is disabled
+ // on key frames or if any spatial layer is a sync layer.
if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY &&
- !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ !cpi->svc.superframe_has_layer_sync) ||
cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF ||
cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
MV_REFERENCE_FRAME ref_frame;
@@ -903,12 +963,11 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
}
}
}
- // Check for disabling inter-layer prediction if
- // INTER_LAYER_PRED_ON_CONSTRAINED is enabled.
- // If the reference for inter-layer prediction (the reference that is scaled)
- // is not the previous spatial layer from the same superframe, then we
- // disable inter-layer prediction.
- if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_ON_CONSTRAINED) {
+ // Check for disabling inter-layer prediction if the reference for inter-layer
+ // prediction (the reference that is scaled) is not the previous spatial layer
+ // from the same superframe, then we disable inter-layer prediction.
+ // Only need to check when inter_layer prediction is not set to OFF mode.
+ if (cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_OFF) {
// We only use LAST and GOLDEN for prediction in real-time mode, so we
// check both here.
MV_REFERENCE_FRAME ref_frame;
@@ -940,3 +999,102 @@ void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
}
}
}
+
+void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // For fixed/non-flexible mode, the folllowing constraint are expected,
+ // when inter-layer prediciton is on (default).
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON &&
+ svc->framedrop_mode != LAYER_DROP) {
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // On non-key frames: LAST is always temporal reference, GOLDEN is
+ // spatial reference.
+ if (svc->temporal_layer_id == 0)
+ // Base temporal only predicts from base temporal.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0);
+ else
+ // Non-base temporal only predicts from lower temporal layer.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] <
+ svc->temporal_layer_id);
+ if (svc->spatial_layer_id > 0) {
+ // Non-base spatial only predicts from lower spatial layer with same
+ // temporal_id.
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ } else if (svc->spatial_layer_id > 0) {
+ // Only 1 reference for frame whose base is key; reference may be LAST
+ // or GOLDEN, so we check both.
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] ==
+ svc->temporal_layer_id);
+ } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ }
+ } else if (svc->use_gf_temporal_ref_current_layer &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // If the usage of golden as second long term reference is enabled for this
+ // layer, then temporal_layer_id of that reference must be base temporal
+ // layer 0, and spatial_layer_id of that reference must be same as current
+ // spatial_layer_id.
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0);
+ }
+}
+
+void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // Only for superframes whose base is not key, as those are
+ // already sync frames.
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ if (svc->spatial_layer_id == 0) {
+ // On base spatial layer: if the current superframe has a layer sync then
+ // reset the pattern counters and reset to base temporal layer.
+ if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi);
+ }
+ // If the layer sync is set for this current spatial layer then
+ // disable the temporal reference.
+ if (svc->spatial_layer_id > 0 &&
+ svc->spatial_layer_sync[svc->spatial_layer_id]) {
+ cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
+ if (svc->use_gf_temporal_ref_current_layer) {
+ int index = svc->spatial_layer_id;
+ // If golden is used as second reference: need to remove it from
+ // prediction, reset refresh period to 0, and update the reference.
+ svc->use_gf_temporal_ref_current_layer = 0;
+ cpi->rc.baseline_gf_interval = 0;
+ cpi->rc.frames_till_gf_update_due = 0;
+ // On layer sync frame we must update the buffer index used for long
+ // term reference. Use the alt_ref since it is not used or updated on
+ // sync frames.
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ }
+ }
+ }
+}
+
+void vp9_svc_update_ref_frame_buffer_idx(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // Update the usage of frame buffer index for base spatial layers.
+ if (svc->spatial_layer_id == 0) {
+ if ((cpi->ref_frame_flags & VP9_LAST_FLAG) || cpi->refresh_last_frame)
+ svc->fb_idx_base[cpi->lst_fb_idx] = 1;
+ if ((cpi->ref_frame_flags & VP9_GOLD_FLAG) || cpi->refresh_golden_frame)
+ svc->fb_idx_base[cpi->gld_fb_idx] = 1;
+ if ((cpi->ref_frame_flags & VP9_ALT_FLAG) || cpi->refresh_alt_ref_frame)
+ svc->fb_idx_base[cpi->alt_fb_idx] = 1;
+ }
+}
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 617717049..0ac1a7315 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -24,7 +24,7 @@ typedef enum {
INTER_LAYER_PRED_ON,
// Inter-layer prediction is off on all frames.
INTER_LAYER_PRED_OFF,
- // Inter-layer prediction is off on non-key frames.
+ // Inter-layer prediction is off on non-key frames and non-sync frames.
INTER_LAYER_PRED_OFF_NONKEY,
// Inter-layer prediction is on on all frames, but constrained such
// that any layer S (> 0) can only predict from previous spatial
@@ -32,6 +32,11 @@ typedef enum {
INTER_LAYER_PRED_ON_CONSTRAINED
} INTER_LAYER_PRED;
+typedef struct BUFFER_LONGTERM_REF {
+ int idx;
+ int is_used;
+} BUFFER_LONGTERM_REF;
+
typedef struct {
RATE_CONTROL rc;
int target_bandwidth;
@@ -69,7 +74,6 @@ typedef struct SVC {
int number_temporal_layers;
int spatial_layer_to_encode;
- int first_spatial_layer_to_encode;
// Workaround for multiple frame contexts
enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;
@@ -96,8 +100,13 @@ typedef struct SVC {
int lst_fb_idx[VPX_MAX_LAYERS];
int gld_fb_idx[VPX_MAX_LAYERS];
int alt_fb_idx[VPX_MAX_LAYERS];
- int ref_frame_index[REF_FRAMES];
int force_zero_mode_spatial_ref;
+ // Sequence level flag to enable second (long term) temporal reference.
+ int use_gf_temporal_ref;
+ // Frame level flag to enable second (long term) temporal reference.
+ int use_gf_temporal_ref_current_layer;
+ // Allow second reference for at most 2 top highest resolution layers.
+ BUFFER_LONGTERM_REF buffer_gf_temporal_ref[2];
int current_superframe;
int non_reference_frame;
int use_base_mv;
@@ -122,6 +131,8 @@ typedef struct SVC {
int last_layer_dropped[VPX_MAX_LAYERS];
int drop_spatial_layer[VPX_MAX_LAYERS];
int framedrop_thresh[VPX_MAX_LAYERS];
+ int drop_count[VPX_MAX_LAYERS];
+ int max_consec_drop;
SVC_LAYER_DROP_MODE framedrop_mode;
INTER_LAYER_PRED disable_inter_layer_pred;
@@ -141,7 +152,19 @@ typedef struct SVC {
// Keep track of the frame buffer index updated/refreshed on the base
// temporal superframe.
- uint8_t fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+ int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+
+ // Keep track of the spatial and temporal layer id of the frame that last
+ // updated the frame buffer index.
+ uint8_t fb_idx_spatial_layer_id[REF_FRAMES];
+ uint8_t fb_idx_temporal_layer_id[REF_FRAMES];
+
+ int spatial_layer_sync[VPX_SS_MAX_LAYERS];
+ uint8_t set_intra_only_frame;
+ uint8_t previous_frame_is_intra_only;
+ uint8_t superframe_has_layer_sync;
+
+ uint8_t fb_idx_base[REF_FRAMES];
} SVC;
struct VP9_COMP;
@@ -201,6 +224,12 @@ void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi);
+void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi);
+
+void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi);
+
+void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 2758c42ae..4db3e6f8e 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -620,13 +620,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) {
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_row, tile_col;
- MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
- // Save input state
- uint8_t *input_buffer[MAX_MB_PLANE];
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
-
vp9_init_tile_data(cpi);
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
@@ -634,9 +627,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) {
temporal_filter_iterate_tile_c(cpi, tile_row, tile_col);
}
}
-
- // Restore input state
- for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
}
// Apply buffer limits and context specific adjustments to arnr filter.
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index d40d3c445..7ca4004b0 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -68,6 +68,7 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht16x16_add_neon.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 5eaa7a18a..13c42c75f 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(extra_cfg, row_mt, 0, 1);
RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2);
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
- RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+ RANGE_CHECK(extra_cfg, cpu_used, -9, 9);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
@@ -1436,7 +1436,6 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
- svc->first_spatial_layer_to_encode = data->spatial_layer_id;
svc->spatial_layer_to_encode = data->spatial_layer_id;
svc->temporal_layer_id = data->temporal_layer_id;
// Checks on valid layer_id input.
@@ -1444,10 +1443,7 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
return VPX_CODEC_INVALID_PARAM;
}
- if (svc->first_spatial_layer_to_encode < 0 ||
- svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) {
- return VPX_CODEC_INVALID_PARAM;
- }
+
return VPX_CODEC_OK;
}
@@ -1536,6 +1532,28 @@ static vpx_codec_err_t ctrl_set_svc_frame_drop_layer(vpx_codec_alg_priv_t *ctx,
cpi->svc.framedrop_mode = data->framedrop_mode;
for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl)
cpi->svc.framedrop_thresh[sl] = data->framedrop_thresh[sl];
+ // Don't allow max_consec_drop values below 1.
+ cpi->svc.max_consec_drop = VPXMAX(1, data->max_consec_drop);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_svc_gf_temporal_ref(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ const unsigned int data = va_arg(args, unsigned int);
+ cpi->svc.use_gf_temporal_ref = data;
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_svc_spatial_layer_sync(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ vpx_svc_spatial_layer_sync_t *data =
+ va_arg(args, vpx_svc_spatial_layer_sync_t *);
+ int sl;
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl)
+ cpi->svc.spatial_layer_sync[sl] = data->spatial_layer_sync[sl];
+ cpi->svc.set_intra_only_frame = data->base_layer_intra_only;
return VPX_CODEC_OK;
}
@@ -1624,6 +1642,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
{ VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred },
{ VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer },
+ { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref },
+ { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync },
// Getters
{ VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 657490f4b..7f45ab28f 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -97,7 +97,7 @@ static vpx_codec_err_t decoder_peek_si_internal(
const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si,
int *is_intra_only, vpx_decrypt_cb decrypt_cb, void *decrypt_state) {
int intra_only_flag = 0;
- uint8_t clear_buffer[10];
+ uint8_t clear_buffer[11];
if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM;
@@ -158,6 +158,9 @@ static vpx_codec_err_t decoder_peek_si_internal(
if (profile > PROFILE_0) {
if (!parse_bitdepth_colorspace_sampling(profile, &rb))
return VPX_CODEC_UNSUP_BITSTREAM;
+ // The colorspace info may cause vp9_read_frame_size() to need 11
+ // bytes.
+ if (data_sz < 11) return VPX_CODEC_UNSUP_BITSTREAM;
}
rb.bit_offset += REF_FRAMES; // refresh_frame_flags
vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
@@ -235,6 +238,19 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, vp9_ppflags_t *flags) {
flags->noise_level = ctx->postproc_cfg.noise_level;
}
+#undef ERROR
+#define ERROR(str) \
+ do { \
+ ctx->base.err_detail = str; \
+ return VPX_CODEC_INVALID_PARAM; \
+ } while (0)
+
+#define RANGE_CHECK(p, memb, lo, hi) \
+ do { \
+ if (!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
+ ERROR(#memb " out of range [" #lo ".." #hi "]"); \
+ } while (0)
+
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
ctx->last_show_frame = -1;
ctx->need_resync = 1;
@@ -251,6 +267,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
ctx->pbi->max_threads = ctx->cfg.threads;
ctx->pbi->inv_tile_order = ctx->invert_tile_order;
+ RANGE_CHECK(ctx, row_mt, 0, 1);
+ ctx->pbi->row_mt = ctx->row_mt;
+
// If postprocessing was enabled by the application and a
// configuration has not been provided, default it.
if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
@@ -632,6 +651,13 @@ static vpx_codec_err_t ctrl_set_spatial_layer_svc(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->row_mt = va_arg(args, int);
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -643,6 +669,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment },
{ VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
{ VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc },
+ { VP9D_SET_ROW_MT, ctrl_set_row_mt },
// Getters
{ VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/vp9/vp9_dx_iface.h b/vp9/vp9_dx_iface.h
index 18bc7ab0d..6a101b03d 100644
--- a/vp9/vp9_dx_iface.h
+++ b/vp9/vp9_dx_iface.h
@@ -45,6 +45,7 @@ struct vpx_codec_alg_priv {
// Allow for decoding up to a given spatial layer for SVC stream.
int svc_decoding;
int svc_spatial_layer;
+ int row_mt;
};
#endif // VP9_VP9_DX_IFACE_H_
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 6186d4614..d5b167bf7 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -140,6 +140,8 @@ VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h
+VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c
+
# Strip unnecessary files with CONFIG_REALTIME_ONLY
VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_firstpass.c
VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_mbgraph.c