summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_blockd.c2
-rw-r--r--vp9/common/vp9_mvref_common.c4
-rw-r--r--vp9/common/vp9_onyxc_int.h1
-rw-r--r--vp9/common/vp9_rtcd_defs.pl2
-rw-r--r--vp9/decoder/vp9_decodeframe.c8
-rw-r--r--vp9/decoder/vp9_decodemv.c1
-rw-r--r--vp9/encoder/vp9_bitstream.c9
-rw-r--r--vp9/encoder/vp9_encodeframe.c86
-rw-r--r--vp9/encoder/vp9_encoder.c60
-rw-r--r--vp9/encoder/vp9_firstpass.c4
-rw-r--r--vp9/encoder/vp9_pickmode.c6
-rw-r--r--vp9/encoder/vp9_ratectrl.c4
-rw-r--r--vp9/encoder/vp9_speed_features.c8
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c63
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h10
-rw-r--r--vp9/encoder/vp9_temporal_filter.c3
-rw-r--r--vp9/encoder/x86/vp9_quantize_sse2.c225
-rw-r--r--vp9/vp9_cx_iface.c6
-rw-r--r--vp9/vp9cx.mk1
19 files changed, 408 insertions, 95 deletions
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index e13445fd1..7094a0118 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -92,7 +92,7 @@ void vp9_foreach_transformed_block(const MACROBLOCKD* const xd,
void *arg) {
int plane;
- for (plane = 0; plane < MAX_MB_PLANE; plane++)
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane)
vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
}
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index fbb3d4b5a..3b34050a8 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -24,10 +24,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
? cm->prev_mi[mi_row * xd->mi_stride + mi_col].src_mi
: NULL;
const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->src_mi->mbmi : NULL;
-
-
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
-
int different_ref_found = 0;
int context_counter = 0;
@@ -127,7 +124,6 @@ static void lower_mv_precision(MV *mv, int allow_hp) {
}
}
-
void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
int_mv *mvlist, int_mv *nearest, int_mv *near) {
int i;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index c28f156ab..f1eda9117 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -143,7 +143,6 @@ typedef struct VP9Common {
int prev_mi_idx;
int mi_alloc_size;
MODE_INFO *mip_array[2];
- MODE_INFO **mi_grid_base_array[2];
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index c2a918106..de389e7af 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1155,7 +1155,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
+ specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 2690f4887..dc712f045 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -656,10 +656,8 @@ static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
int *width, int *height) {
- const int w = vp9_rb_read_literal(rb, 16) + 1;
- const int h = vp9_rb_read_literal(rb, 16) + 1;
- *width = w;
- *height = h;
+ *width = vp9_rb_read_literal(rb, 16) + 1;
+ *height = vp9_rb_read_literal(rb, 16) + 1;
}
static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
@@ -749,7 +747,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
if (!found)
vp9_read_frame_size(rb, &width, &height);
- if (width <=0 || height <= 0)
+ if (width <= 0 || height <= 0)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Invalid frame size");
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 3c8e7cc6b..a01fe842e 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -223,7 +223,6 @@ static int read_mv_component(vp9_reader *r,
fr = vp9_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d]
: mvcomp->fp);
-
// High precision part (if hp is not used, the default value of the hp is 1)
hp = usehp ? vp9_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp)
: 1;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 694cac76f..3954fe6a7 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1013,7 +1013,11 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
(cpi->svc.number_spatial_layers > 1 &&
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
+ (is_two_pass_svc(cpi) &&
+ cpi->svc.encode_empty_frame_state == ENCODING &&
+ cpi->svc.layer_context[0].frames_from_key_frame <
+ cpi->svc.number_temporal_layers + 1))) {
found = 0;
}
vp9_wb_write_bit(wb, found);
@@ -1105,8 +1109,7 @@ static void write_uncompressed_header(VP9_COMP *cpi,
// will change to show_frame flag to 0, then add an one byte frame with
// show_existing_frame flag which tells the decoder which frame we want to
// show.
- if (!cm->show_frame ||
- (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0))
+ if (!cm->show_frame)
vp9_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index b43ff9747..197f54cfc 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -412,29 +412,47 @@ static int set_vt_partitioning(VP9_COMP *cpi,
return 1;
}
- // Vertical split is available on all but the bottom border.
- if (mi_row + block_height / 2 < cm->mi_rows &&
- vt.part_variances->vert[0].variance < threshold &&
- vt.part_variances->vert[1].variance < threshold) {
- BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
- set_block_size(cpi, mi_row, mi_col, subsize);
- set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
- return 1;
- }
-
- // Horizontal split is available on all but the right border.
- if (mi_col + block_width / 2 < cm->mi_cols &&
- vt.part_variances->horz[0].variance < threshold &&
- vt.part_variances->horz[1].variance < threshold) {
- BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
- set_block_size(cpi, mi_row, mi_col, subsize);
- set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
- return 1;
+ // Only allow split for blocks above 16x16.
+ if (bsize > BLOCK_16X16) {
+ // Vertical split is available on all but the bottom border.
+ if (mi_row + block_height / 2 < cm->mi_rows &&
+ vt.part_variances->vert[0].variance < threshold &&
+ vt.part_variances->vert[1].variance < threshold) {
+ BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
+ set_block_size(cpi, mi_row, mi_col, subsize);
+ set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
+ return 1;
+ }
+
+ // Horizontal split is available on all but the right border.
+ if (mi_col + block_width / 2 < cm->mi_cols &&
+ vt.part_variances->horz[0].variance < threshold &&
+ vt.part_variances->horz[1].variance < threshold) {
+ BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
+ set_block_size(cpi, mi_row, mi_col, subsize);
+ set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
+ return 1;
+ }
+ }
+
+ // This will only allow 8x8 if the 16x16 variance is very large.
+ if (bsize == BLOCK_16X16) {
+ if (mi_col + block_width / 2 < cm->mi_cols &&
+ mi_row + block_height / 2 < cm->mi_rows &&
+ vt.part_variances->none.variance < (threshold << 6)) {
+ set_block_size(cpi, mi_row, mi_col, bsize);
+ return 1;
+ }
}
return 0;
}
-// TODO(debargha): Fix this function and make it work as expected.
+// This function chooses partitioning based on the variance
+// between source and reconstructed last, where variance is
+// computed for 8x8 downsampled inputs. Some things to check:
+// using the last source rather than reconstructed last, and
+// allowing for small downsampling (4x4 or 2x2) for selection
+// of smaller block sizes (i.e., < 16x16).
static void choose_partitioning(VP9_COMP *cpi,
const TileInfo *const tile,
int mi_row, int mi_col) {
@@ -549,27 +567,11 @@ static void choose_partitioning(VP9_COMP *cpi,
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
- // NOTE: This is a temporary hack to disable 8x8 partitions,
- // since it works really bad - possibly due to a bug
-#define DISABLE_8X8_VAR_BASED_PARTITION
-#ifdef DISABLE_8X8_VAR_BASED_PARTITION
- if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
- mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
- set_block_size(cpi,
- (mi_row + y32_idx + y16_idx),
- (mi_col + x32_idx + x16_idx),
- BLOCK_16X16);
- } else {
- for (k = 0; k < 4; ++k) {
- const int x8_idx = (k & 1);
- const int y8_idx = (k >> 1);
- set_block_size(cpi,
- (mi_row + y32_idx + y16_idx + y8_idx),
- (mi_col + x32_idx + x16_idx + x8_idx),
- BLOCK_8X8);
- }
- }
-#else
+ // NOTE: Since this uses 8x8 downsampling for variance calculation
+ // we cannot really select block size 8x8 (or even 8x16/16x8),
+ // since we do not sufficient samples for variance.
+ // For now, 8x8 partition is only set if the variance of the 16x16
+ // block is very high. This is controlled in set_vt_partitioning.
if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
BLOCK_16X16,
mi_row + y32_idx + y16_idx,
@@ -583,7 +585,6 @@ static void choose_partitioning(VP9_COMP *cpi,
BLOCK_8X8);
}
}
-#endif
}
}
}
@@ -3158,7 +3159,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int64_t dummy_dist = 0;
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO *mi = cm->mi + idx_str;
- MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
BLOCK_SIZE bsize;
x->in_static_area = 0;
x->source_variance = UINT_MAX;
@@ -3196,7 +3196,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
&dummy_rate, &dummy_dist, 1, INT64_MAX,
cpi->pc_root);
} else {
- copy_partitioning(cm, mi, prev_mi);
+ choose_partitioning(cpi, tile, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
cpi->pc_root);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 8096a9072..1758e3fdb 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -225,6 +225,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
}
vpx_memset(&cpi->svc.scaled_frames[0], 0,
MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
+
+ vp9_free_frame_buffer(&cpi->svc.empty_frame.img);
+ vpx_memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
}
static void save_coding_context(VP9_COMP *cpi) {
@@ -585,8 +588,6 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
cpi->ref_frame_flags = 0;
init_buffer_indices(cpi);
-
- set_tile_limits(cpi);
}
static void set_rc_buffer_sizes(RATE_CONTROL *rc,
@@ -2981,7 +2982,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
}
if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
+ // Use the last frame context for the empty frame.
cm->frame_context_idx =
+ (cpi->svc.encode_empty_frame_state == ENCODING) ? FRAME_CONTEXTS - 1 :
cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id;
@@ -3162,7 +3165,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->ref_frame_flags = get_ref_frame_flags(cpi);
cm->last_frame_type = cm->frame_type;
- vp9_rc_postencode_update(cpi, *size);
+
+ if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
+ vp9_rc_postencode_update(cpi, *size);
#if 0
output_frame_level_debug_stats(cpi);
@@ -3186,12 +3191,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cm->last_height = cm->height;
// reset to normal state now that we are done.
- if (!cm->show_existing_frame) {
- if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0)
- cm->last_show_frame = 0;
- else
- cm->last_show_frame = cm->show_frame;
- }
+ if (!cm->show_existing_frame)
+ cm->last_show_frame = cm->show_frame;
if (cm->show_frame) {
vp9_swap_mi_and_prev_mi(cm);
@@ -3228,7 +3229,9 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
uint8_t *dest, unsigned int *frame_flags) {
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
- vp9_twopass_postencode_update(cpi);
+
+ if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
+ vp9_twopass_postencode_update(cpi);
}
static void init_motion_estimation(VP9_COMP *cpi) {
@@ -3416,6 +3419,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (is_two_pass_svc(cpi)) {
#if CONFIG_SPATIAL_SVC
vp9_svc_start_frame(cpi);
+ // Use a small empty frame instead of a real frame
+ if (cpi->svc.encode_empty_frame_state == ENCODING)
+ source = &cpi->svc.empty_frame;
#endif
if (oxcf->pass == 2)
vp9_restore_layer_context(cpi);
@@ -3434,6 +3440,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
+
+ // Skip alt frame if we encode the empty frame
+ if (is_two_pass_svc(cpi) && source != NULL)
+ arf_src_index = 0;
+
if (arf_src_index) {
assert(arf_src_index <= rc->frames_to_key);
@@ -3544,7 +3555,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// For two pass encodes analyse the first pass stats and determine
// the bit allocation and other parameters for this frame / group of frames.
- if ((oxcf->pass == 2) && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ if ((oxcf->pass == 2) &&
+ (!cpi->use_svc ||
+ (is_two_pass_svc(cpi) &&
+ cpi->svc.encode_empty_frame_state != ENCODING))) {
vp9_rc_get_second_pass_params(cpi);
}
@@ -3773,10 +3787,18 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif
- if (is_two_pass_svc(cpi) && cm->show_frame) {
- ++cpi->svc.spatial_layer_to_encode;
- if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
- cpi->svc.spatial_layer_to_encode = 0;
+ if (is_two_pass_svc(cpi)) {
+ if (cpi->svc.encode_empty_frame_state == ENCODING)
+ cpi->svc.encode_empty_frame_state = ENCODED;
+
+ if (cm->show_frame) {
+ ++cpi->svc.spatial_layer_to_encode;
+ if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
+ cpi->svc.spatial_layer_to_encode = 0;
+
+ // May need the empty frame after an visible frame.
+ cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
+ }
}
return 0;
}
@@ -3867,10 +3889,6 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
if (width) {
cm->width = width;
- if (cm->width * 5 < cpi->initial_width) {
- cm->width = cpi->initial_width / 5 + 1;
- printf("Warning: Desired width too small, changed to %d\n", cm->width);
- }
if (cm->width > cpi->initial_width) {
cm->width = cpi->initial_width;
printf("Warning: Desired width too large, changed to %d\n", cm->width);
@@ -3879,10 +3897,6 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
if (height) {
cm->height = height;
- if (cm->height * 5 < cpi->initial_height) {
- cm->height = cpi->initial_height / 5 + 1;
- printf("Warning: Desired height too small, changed to %d\n", cm->height);
- }
if (cm->height > cpi->initial_height) {
cm->height = cpi->initial_height;
printf("Warning: Desired height too large, changed to %d\n", cm->height);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c9588a343..96c3e0aa4 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2405,6 +2405,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
lc->frames_from_key_frame = 0;
+ // Reset the empty frame resolution since we have a key frame.
+ cpi->svc.empty_frame_width = cm->width;
+ cpi->svc.empty_frame_height = cm->height;
}
} else {
cm->frame_type = INTER_FRAME;
@@ -2478,6 +2481,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
if (rc->total_actual_bits) {
rc->rate_error_estimate =
(int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits);
+ rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100);
} else {
rc->rate_error_estimate = 0;
}
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 42f46917c..b74b2dd56 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -486,8 +486,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
+ // Reduce the intra cost penalty for small blocks (<=16x16).
+ const int reduction_fac =
+ (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
+ bsize <= BLOCK_16X16) ? 4 : 1;
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
- cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) / reduction_fac;
const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
intra_cost_penalty, 0);
const int intra_mode_cost = 50;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index ef32fe179..65bca669a 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1500,9 +1500,7 @@ void vp9_rc_set_gf_max_interval(const VP9_COMP *const cpi,
rc->max_gf_interval = 16;
// Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = oxcf->key_freq >> 1;
- if (rc->static_scene_max_gf_interval > (MAX_LAG_BUFFERS * 2))
- rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+ rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
if (is_altref_enabled(cpi)) {
if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 8788be645..bec77d71f 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -271,6 +271,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->partition_search_type = REFERENCE_PARTITION;
sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0;
+ sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
}
if (speed >= 6) {
@@ -285,10 +289,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->partition_search_type = VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->mv.search_method = NSTEP;
- sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
- sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
- sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
- sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index eed681c96..1573557d4 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -14,6 +14,8 @@
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_extend.h"
+#define SMALL_FRAME_FB_IDX 7
+
void vp9_init_layer_context(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -28,6 +30,25 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
layer_end = svc->number_temporal_layers;
} else {
layer_end = svc->number_spatial_layers;
+
+ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
+ if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
+ cpi->common.width, cpi->common.height,
+ cpi->common.subsampling_x,
+ cpi->common.subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cpi->common.use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate empty frame for multiple frame "
+ "contexts");
+
+ vpx_memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
+ cpi->svc.empty_frame.img.buffer_alloc_sz);
+ cpi->svc.empty_frame_width = cpi->common.width;
+ cpi->svc.empty_frame_height = cpi->common.height;
+ }
}
for (layer = 0; layer < layer_end; ++layer) {
@@ -310,6 +331,47 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den,
&width, &height);
+
+ // Workaround for multiple frame contexts. In some frames we can't use prev_mi
+ // since its previous frame could be changed during decoding time. The idea is
+ // we put a empty invisible frame in front of them, then we will not use
+ // prev_mi when encoding these frames.
+ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 &&
+ cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE) {
+ if ((cpi->svc.number_temporal_layers > 1 &&
+ cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) ||
+ (cpi->svc.number_spatial_layers > 1 &&
+ cpi->svc.spatial_layer_id == 0)) {
+ struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 0);
+
+ if (buf != NULL) {
+ cpi->svc.empty_frame.ts_start = buf->ts_start;
+ cpi->svc.empty_frame.ts_end = buf->ts_end;
+ cpi->svc.encode_empty_frame_state = ENCODING;
+ cpi->common.show_frame = 0;
+ cpi->ref_frame_flags = 0;
+ cpi->common.frame_type = INTER_FRAME;
+ cpi->lst_fb_idx =
+ cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
+
+ // Gradually make the empty frame smaller to save bits. Make it half of
+ // its previous size because of the scaling factor restriction.
+ cpi->svc.empty_frame_width >>= 1;
+ cpi->svc.empty_frame_width = (cpi->svc.empty_frame_width + 1) & ~1;
+ if (cpi->svc.empty_frame_width < 16)
+ cpi->svc.empty_frame_width = 16;
+
+ cpi->svc.empty_frame_height >>= 1;
+ cpi->svc.empty_frame_height = (cpi->svc.empty_frame_height + 1) & ~1;
+ if (cpi->svc.empty_frame_height < 16)
+ cpi->svc.empty_frame_height = 16;
+
+ width = cpi->svc.empty_frame_width;
+ height = cpi->svc.empty_frame_height;
+ }
+ }
+ }
+
if (vp9_set_size_literal(cpi, width, height) != 0)
return VPX_CODEC_INVALID_PARAM;
@@ -317,7 +379,6 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
cpi->oxcf.best_allowed_q = vp9_quantizer_to_qindex(lc->min_q);
vp9_change_config(cpi, &cpi->oxcf);
-
vp9_set_high_precision_mv(cpi, 1);
cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 47a5456b6..e9645ce9f 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -50,6 +50,16 @@ typedef struct {
int spatial_layer_to_encode;
+ // Workaround for multiple frame contexts
+ enum {
+ ENCODED = 0,
+ ENCODING,
+ NEED_TO_ENCODE
+ }encode_empty_frame_state;
+ struct lookahead_entry empty_frame;
+ int empty_frame_width;
+ int empty_frame_height;
+
// Store scaled source frames to be used for temporal filter to generate
// a alt ref frame.
YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 9ae81e761..5599227ce 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -719,6 +719,9 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
++frame_used;
}
}
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ cpi->mb.e_mbd.mi = cm->mi;
+ cpi->mb.e_mbd.mi[0].src_mi = &cpi->mb.e_mbd.mi[0];
} else {
// ARF is produced at the native frame size and resized when coded.
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
new file mode 100644
index 000000000..7c1c8843c
--- /dev/null
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
+#include "vpx/vpx_integer.h"
+
+void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t* zbin_ptr,
+ const int16_t* round_ptr, const int16_t* quant_ptr,
+ const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
+ int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
+ int zbin_oq_value, uint16_t* eob_ptr,
+ const int16_t* scan_ptr,
+ const int16_t* iscan_ptr) {
+ __m128i zero;
+ (void)scan_ptr;
+
+ coeff_ptr += n_coeffs;
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+ zero = _mm_setzero_si128();
+ if (!skip_block) {
+ __m128i eob;
+ __m128i zbin;
+ __m128i round, quant, dequant, shift;
+ {
+ __m128i coeff0, coeff1;
+
+ // Setup global values
+ {
+ __m128i zbin_oq;
+ __m128i pw_1;
+ zbin_oq = _mm_set1_epi16(zbin_oq_value);
+ zbin = _mm_load_si128((const __m128i*)zbin_ptr);
+ round = _mm_load_si128((const __m128i*)round_ptr);
+ quant = _mm_load_si128((const __m128i*)quant_ptr);
+ zbin = _mm_add_epi16(zbin, zbin_oq);
+ pw_1 = _mm_set1_epi16(1);
+ zbin = _mm_sub_epi16(zbin, pw_1);
+ dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+ shift = _mm_load_si128((const __m128i*)quant_shift_ptr);
+ }
+
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ __m128i cmp_mask0, cmp_mask1;
+ // Do DC and first 15 AC
+ coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+ zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC
+ cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ round = _mm_unpackhi_epi64(round, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ quant = _mm_unpackhi_epi64(quant, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+ qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+ qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+ shift = _mm_unpackhi_epi64(shift, shift);
+ qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ // Mask out zbin threshold coeffs
+ qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+ qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ dequant = _mm_unpackhi_epi64(dequant, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob = _mm_max_epi16(eob, eob1);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ __m128i coeff0, coeff1;
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ __m128i cmp_mask0, cmp_mask1;
+
+ coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+ cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+ qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+ qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+ qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ // Mask out zbin threshold coeffs
+ qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+ qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob0, eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob0 = _mm_max_epi16(eob0, eob1);
+ eob = _mm_max_epi16(eob, eob0);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // Accumulate EOB
+ {
+ __m128i eob_shuffled;
+ eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ *eob_ptr = _mm_extract_epi16(eob, 1);
+ }
+ } else {
+ do {
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+ n_coeffs += 8 * 2;
+ } while (n_coeffs < 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 041ba27da..d0ca5242c 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -188,11 +188,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
}
if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
ERROR("Not enough ref buffers for svc alt ref frames");
- if ((cfg->ss_number_layers > 3 ||
- cfg->ss_number_layers * cfg->ts_number_layers > 4) &&
+ if (cfg->ss_number_layers * cfg->ts_number_layers > 3 &&
cfg->g_error_resilient == 0)
- ERROR("Multiple frame context are not supported for more than 3 spatial "
- "layers or more than 4 spatial x temporal layers");
+ ERROR("Multiple frame context are not supported for more than 3 layers");
}
#endif
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index a2e3cda7f..ad767229a 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -101,6 +101,7 @@ VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm