diff options
-rw-r--r-- | vp9/common/vp9_debugmodes.c | 42 | ||||
-rw-r--r-- | vp9/common/vp9_thread.h | 2 | ||||
-rw-r--r-- | vp9/common/x86/vp9_high_intrapred_sse2.asm | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 89 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 1 | ||||
-rw-r--r-- | vpxenc.c | 18 |
11 files changed, 116 insertions, 68 deletions
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c index d9dace6ac..e96bc4f2b 100644 --- a/vp9/common/vp9_debugmodes.c +++ b/vp9/common/vp9_debugmodes.c @@ -25,55 +25,65 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, size_t member_offset) { int mi_row, mi_col; - int mi_index = 0; - // TODO(hkuang): Fix this debug function. - MODE_INFO **mi = &cm->mi; + MODE_INFO *mi = cm->mi; int rows = cm->mi_rows; int cols = cm->mi_cols; char prefix = descriptor[0]; log_frame_info(cm, descriptor, file); - mi_index = 0; for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(file, "%2d ", - *((int*) ((char *) (&mi[mi_index]->mbmi) + - member_offset))); - mi_index++; + *((int*) ((char *) (&mi->src_mi->mbmi) + + member_offset))); + mi++; } fprintf(file, "\n"); - mi_index += 8; + mi += 8; } fprintf(file, "\n"); } + void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int mi_row; int mi_col; - int mi_index = 0; FILE *mvs = fopen(file, "a"); - // TODO(hkuang): Fix this debug function. - MODE_INFO **mi = &cm->mi; + MODE_INFO *mi = cm->mi; int rows = cm->mi_rows; int cols = cm->mi_cols; print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type)); print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode)); - print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip)); print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0])); print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size)); print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode)); + // output skip infomation. + log_frame_info(cm, "Skips:", mvs); + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(mvs, "S "); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi->src_mi->mbmi.skip); + mi++; + } + fprintf(mvs, "\n"); + mi += 8; + } + fprintf(mvs, "\n"); + + // output motion vectors. log_frame_info(cm, "Vectors ", mvs); + mi = cm->mi; for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi[mi_index]->mbmi.mv[0].as_mv.row, - mi[mi_index]->mbmi.mv[0].as_mv.col); - mi_index++; + fprintf(mvs, "%4d:%4d ", mi->src_mi->mbmi.mv[0].as_mv.row, + mi->src_mi->mbmi.mv[0].as_mv.col); + mi++; } fprintf(mvs, "\n"); - mi_index += 8; + mi += 8; } fprintf(mvs, "\n"); diff --git a/vp9/common/vp9_thread.h b/vp9/common/vp9_thread.h index c24ef5fac..12848fede 100644 --- a/vp9/common/vp9_thread.h +++ b/vp9/common/vp9_thread.h @@ -28,7 +28,7 @@ extern "C" { #if CONFIG_MULTITHREAD -#if defined(_WIN32) +#if defined(_WIN32) && !HAVE_PTHREAD_H #include <errno.h> // NOLINT #include <process.h> // NOLINT #include <windows.h> // NOLINT diff --git a/vp9/common/x86/vp9_high_intrapred_sse2.asm b/vp9/common/x86/vp9_high_intrapred_sse2.asm index 721126c78..b12d29c0a 100644 --- a/vp9/common/x86/vp9_high_intrapred_sse2.asm +++ b/vp9/common/x86/vp9_high_intrapred_sse2.asm @@ -345,7 +345,7 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one %if ARCH_X86_64 INIT_XMM sse2 -cglobal highbd_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one +cglobal highbd_tm_predictor_16x16, 5, 6, 9, dst, stride, above, left, bps, one movd m2, [aboveq-2] mova m0, [aboveq] mova m1, [aboveq+16] diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 0885909cd..99bb9300e 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -471,6 +471,43 @@ static int set_vt_partitioning(VP9_COMP *cpi, return 0; } + +void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) { + SPEED_FEATURES *const sf = &cpi->sf; + if (sf->partition_search_type != VAR_BASED_PARTITION) { + return; + } else { + VP9_COMMON *const cm = &cpi->common; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + const int is_key_frame = (cm->frame_type == KEY_FRAME); + const int use_4x4_partition = is_key_frame; + const int low_res = (cm->width <= 352 && cm->height <= 288); + const int threshold_multiplier = is_key_frame ? 80 : 4; + const int64_t threshold_base = (int64_t)(threshold_multiplier * + vp9_convert_qindex_to_q(q, cm->bit_depth)); + cpi->vbp_threshold = threshold_base; + cpi->vbp_threshold_bsize_min = threshold_base << oxcf->speed; + cpi->vbp_threshold_bsize_max = threshold_base; + + if (is_key_frame) { + cpi->vbp_threshold = threshold_base >> 2; + cpi->vbp_threshold_bsize_min = threshold_base << 2; + } else if (low_res) { + cpi->vbp_threshold_bsize_min = threshold_base << 3; + cpi->vbp_threshold_bsize_max = threshold_base >> 2; + } + // TODO(marpan): Allow 4x4 partitions for inter-frames. + // use_4x4_partition = (variance4x4downsample[i2 + j] == 1); + // If 4x4 partition is not used, then 8x8 partition will be selected + // if variance of 16x16 block is very high, so use larger threshold + // for 16x16 (threshold_bsize_min) in that case. + cpi->vbp_threshold_16x16 = (use_4x4_partition) ? + cpi->vbp_threshold : cpi->vbp_threshold_bsize_min; + cpi->vbp_bsize_min = (use_4x4_partition) ? BLOCK_8X8 : BLOCK_16X16; + } +} + + // This function chooses partitioning based on the variance between source and // reconstructed last, where variance is computed for downs-sampled inputs. static void choose_partitioning(VP9_COMP *cpi, @@ -479,7 +516,6 @@ static void choose_partitioning(VP9_COMP *cpi, int mi_row, int mi_col) { VP9_COMMON * const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - int i, j, k, m; v64x64 vt; v16x16 vt2[16]; @@ -489,34 +525,12 @@ static void choose_partitioning(VP9_COMP *cpi, int dp; int pixels_wide = 64, pixels_high = 64; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; + // Always use 4x4 partition for key frame. const int is_key_frame = (cm->frame_type == KEY_FRAME); const int use_4x4_partition = is_key_frame; + const int low_res = (cm->width <= 352 && cm->height <= 288); int variance4x4downsample[16]; - int low_res = (cm->width <= 352 && cm->height <= 288) ? 1 : 0; - const int threshold_multiplier = is_key_frame ? 80 : 4; - int64_t threshold_base; - int64_t threshold; - int64_t threshold_bsize_min; - int64_t threshold_bsize_max; - - vp9_clear_system_state(); - threshold_base = (int64_t)(threshold_multiplier * - vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth)); - threshold = threshold_base; - threshold_bsize_min = threshold_base << cpi->oxcf.speed; - threshold_bsize_max = threshold_base; - - // Modify thresholds for key frame and for low-resolutions (set lower - // thresholds to favor split). - if (is_key_frame) { - threshold = threshold_base >> 2; - threshold_bsize_min = threshold_base << 2; - } else if (low_res) { - threshold_bsize_min = threshold_base << 3; - threshold_bsize_max = threshold_base >> 2; - } set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); @@ -531,7 +545,8 @@ static void choose_partitioning(VP9_COMP *cpi, if (!is_key_frame) { MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; unsigned int var = 0, sse; - vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf); + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[LAST_FRAME - 1].sf); mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[1] = NONE; mbmi->sb_type = BLOCK_64X64; @@ -619,7 +634,7 @@ static void choose_partitioning(VP9_COMP *cpi, } if (is_key_frame || (low_res && vt.split[i].split[j].part_variances.none.variance > - (threshold << 1))) { + (cpi->vbp_threshold << 1))) { // Go down to 4x4 down-sampling for variance. variance4x4downsample[i2 + j] = 1; for (k = 0; k < 4; k++) { @@ -680,30 +695,22 @@ static void choose_partitioning(VP9_COMP *cpi, } fill_variance_tree(&vt, BLOCK_64X64); - // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold. if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col, - threshold_bsize_max, BLOCK_16X16)) { + cpi->vbp_threshold_bsize_max, BLOCK_16X16)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); const int i2 = i << 2; if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32, (mi_row + y32_idx), (mi_col + x32_idx), - threshold, BLOCK_16X16)) { + cpi->vbp_threshold, + BLOCK_16X16)) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); - // TODO(marpan): Allow 4x4 partitions for inter-frames. - // use_4x4_partition = (variance4x4downsample[i2 + j] == 1); - // If 4x4 partition is not used, then 8x8 partition will be selected - // if variance of 16x16 block is very high, so use larger threshold - // for 16x16 (threshold_bsize_min) in that case. - uint64_t threshold_16x16 = (use_4x4_partition) ? threshold : - threshold_bsize_min; - BLOCK_SIZE bsize_min = (use_4x4_partition) ? BLOCK_8X8 : BLOCK_16X16; // For inter frames: if variance4x4downsample[] == 1 for this 16x16 // block, then the variance is based on 4x4 down-sampling, so use vt2 // in set_vt_partioning(), otherwise use vt. @@ -713,7 +720,8 @@ static void choose_partitioning(VP9_COMP *cpi, if (!set_vt_partitioning(cpi, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, mi_col + x32_idx + x16_idx, - threshold_16x16, bsize_min)) { + cpi->vbp_threshold_16x16, + cpi->vbp_bsize_min)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); @@ -722,7 +730,8 @@ static void choose_partitioning(VP9_COMP *cpi, BLOCK_8X8, mi_row + y32_idx + y16_idx + y8_idx, mi_col + x32_idx + x16_idx + x8_idx, - threshold_bsize_min, BLOCK_8X8)) { + cpi->vbp_threshold_bsize_min, + BLOCK_8X8)) { set_block_size(cpi, xd, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 556f3a5a1..8d545b671 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -38,6 +38,8 @@ void vp9_init_tile_data(struct VP9_COMP *cpi); void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col); +void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 65b660528..eda38ff3d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2800,7 +2800,10 @@ static void encode_without_recode_loop(VP9_COMP *cpi) { set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); vp9_set_quantizer(cm, q); + vp9_set_vbp_thresholds(cpi, q); + setup_frame(cpi); + // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 1e4c982ff..35c5a487b 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -450,6 +450,13 @@ typedef struct VP9_COMP { int resize_pending; + // VAR_BASED_PARTITION thresholds + int64_t vbp_threshold; + int64_t vbp_threshold_bsize_min; + int64_t vbp_threshold_bsize_max; + int64_t vbp_threshold_16x16; + BLOCK_SIZE vbp_bsize_min; + // Multi-threading int num_workers; VP9Worker *workers; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index d24e4c7e6..071747e17 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -810,9 +810,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); - cost = RDCOST(x->rdmult, x->rddiv, - vp9_get_switchable_rate(cpi, xd) + pf_rate[filter], - pf_dist[filter]); + pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); + cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); pf_tx_size[filter] = mbmi->tx_size; if (cost < best_cost) { best_filter = filter; @@ -849,6 +848,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y); + this_rdc.rate += cm->interp_filter == SWITCHABLE ? + vp9_get_switchable_rate(cpi, xd) : 0; } // chroma component rate-distortion cost modeling @@ -1129,8 +1130,6 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t b_best_rd = INT64_MAX; const int i = idy * 2 + idx; PREDICTION_MODE this_mode; - int b_rate = 0; - int64_t b_dist = 0; RD_COST this_rdc; unsigned int var_y, sse_y; @@ -1158,6 +1157,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &b_mv[NEARMV]); for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { + int b_rate = 0; xd->mi[0].bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int; if (this_mode == NEWMV) { @@ -1219,6 +1219,9 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &x->pred_sse[ref_frame], NULL, 0, 0); xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv; + } else { + b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; } vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride, @@ -1235,7 +1238,6 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &var_y, &sse_y); this_rdc.rate += b_rate; - this_rdc.dist += b_dist; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < b_best_rd) { diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 8d316d6bc..69751379f 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1173,8 +1173,8 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { // Modify frame size target when down-scaling. if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && rc->frame_size_selector != UNSCALED) - rc->this_frame_target = - rc->this_frame_target * rate_thresh_mult[rc->frame_size_selector]; + rc->this_frame_target = (int)(rc->this_frame_target + * rate_thresh_mult[rc->frame_size_selector]); // Target rate per SB64 (including partial SB64s. rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) / diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index eaa0accdb..8722d9cec 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -421,4 +421,3 @@ void vp9_set_speed_features_framesize_dependent(struct VP9_COMP *cpi); #endif #endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ - @@ -398,6 +398,22 @@ static const arg_def_t frame_periodic_boost = ARG_DEF( NULL, "frame-boost", 1, "Enable frame periodic boost (0: off (default), 1: on)"); +static const struct arg_enum_list color_space_enum[] = { + { "unknown", VPX_CS_UNKNOWN }, + { "bt601", VPX_CS_BT_601 }, + { "bt709", VPX_CS_BT_709 }, + { "smpte170", VPX_CS_SMPTE_170 }, + { "smpte240", VPX_CS_SMPTE_240 }, + { "bt2020", VPX_CS_BT_2020 }, + { "reserved", VPX_CS_RESERVED }, + { "sRGB", VPX_CS_SRGB }, + { NULL, 0 } +}; + +static const arg_def_t input_color_space = ARG_DEF_ENUM( + NULL, "color-space", 1, + "The color space of input content:", color_space_enum); + #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { {"8", VPX_BITS_8}, @@ -429,7 +445,7 @@ static const arg_def_t *vp9_args[] = { &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct, &gf_cbr_boost_pct, &lossless, &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, - &noise_sens, &tune_content, + &noise_sens, &tune_content, &input_color_space, #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH &bitdeptharg, &inbitdeptharg, #endif |