diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_dct.c | 24 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 51 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 9 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 36 |
6 files changed, 76 insertions, 69 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index d5232393f..577276764 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -445,20 +445,20 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { step3[7] = step1[7] + step2[4]; // step 4 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; - temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; + temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; step2[1] = fdct_round_shift(temp1); step2[2] = fdct_round_shift(temp2); - temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; + temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; step2[5] = fdct_round_shift(temp1); step2[6] = fdct_round_shift(temp2); // step 5 step1[0] = step3[0] + step2[1]; step1[1] = step3[0] - step2[1]; - step1[2] = step3[3] - step2[2]; - step1[3] = step3[3] + step2[2]; - step1[4] = step3[4] + step2[5]; - step1[5] = step3[4] - step2[5]; + step1[2] = step3[3] + step2[2]; + step1[3] = step3[3] - step2[2]; + step1[4] = step3[4] - step2[5]; + step1[5] = step3[4] + step2[5]; step1[6] = step3[7] - step2[6]; step1[7] = step3[7] + step2[6]; // step 6 @@ -755,10 +755,10 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { // step 4 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; - temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; + temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; step2[1] = fdct_round_shift(temp1); step2[2] = fdct_round_shift(temp2); - temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; + temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; step2[5] = fdct_round_shift(temp1); step2[6] = fdct_round_shift(temp2); @@ -766,10 +766,10 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { // step 5 step1[0] = step3[0] + step2[1]; step1[1] = step3[0] - step2[1]; - step1[2] = step3[3] - step2[2]; - step1[3] = step3[3] + step2[2]; - step1[4] = step3[4] + step2[5]; - step1[5] = step3[4] - step2[5]; + step1[2] = step3[3] + step2[2]; + step1[3] = step3[3] - step2[2]; + step1[4] = step3[4] - step2[5]; + step1[5] = step3[4] + step2[5]; step1[6] = step3[7] - step2[6]; step1[7] = step3[7] + step2[6]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ef33fcaf1..cca17592e 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1318,7 +1318,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, return x->in_static_area; } -static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { +static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8, + const int motion_thresh) { const int mis = cm->mi_stride; int block_row, block_col; @@ -1327,8 +1328,8 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { for (block_col = 0; block_col < 8; ++block_col) { const MODE_INFO *prev_mi = prev_mi_8x8[block_row * mis + block_col]; if (prev_mi) { - if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 || - abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8) + if (abs(prev_mi->mbmi.mv[0].as_mv.row) > motion_thresh || + abs(prev_mi->mbmi.mv[0].as_mv.col) > motion_thresh) return 1; } } @@ -2324,7 +2325,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, || cpi->rc.is_src_frame_alt_ref || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && - sb_has_motion(cm, prev_mi))) { + sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); @@ -2337,7 +2338,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, cpi->pc_root); } else { if (sf->constrain_copy_partition && - sb_has_motion(cm, prev_mi)) + sb_has_motion(cm, prev_mi, sf->lf_motion_threshold)) constrain_copy_partitioning(cpi, tile, mi, prev_mi, mi_row, mi_col, BLOCK_16X16); else diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index a7c527af4..efc20f6b3 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -259,25 +259,14 @@ static void avg_stats(FIRSTPASS_STATS *section) { // Calculate a modified Error used in distributing bits between easier and // harder frames. -static double calculate_modified_err(const VP9_COMP *cpi, +static double calculate_modified_err(const TWO_PASS *twopass, + const VP9EncoderConfig *oxcf, const FIRSTPASS_STATS *this_frame) { - const TWO_PASS *twopass = &cpi->twopass; - const SVC *const svc = &cpi->svc; - const FIRSTPASS_STATS *stats; - double av_err; - double modified_error; - - if (svc->number_spatial_layers > 1 && - svc->number_temporal_layers == 1) { - twopass = &svc->layer_context[svc->spatial_layer_id].twopass; - } - - stats = &twopass->total_stats; - av_err = stats->ssim_weighted_pred_err / stats->count; - modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(av_err), - cpi->oxcf.two_pass_vbrbias / 100.0); - + const FIRSTPASS_STATS *const stats = &twopass->total_stats; + const double av_err = stats->ssim_weighted_pred_err / stats->count; + const double modified_error = av_err * + pow(this_frame->ssim_weighted_pred_err / DOUBLE_DIVIDE_CHECK(av_err), + oxcf->two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); } @@ -1027,8 +1016,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) { (av_error * oxcf->two_pass_vbrmax_section) / 100; while (input_stats(twopass, &this_frame) != EOF) { - twopass->modified_error_total += - calculate_modified_err(cpi, &this_frame); + twopass->modified_error_total += calculate_modified_err(twopass, oxcf, + &this_frame); } twopass->modified_error_left = twopass->modified_error_total; @@ -1519,7 +1508,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { start_pos = twopass->stats_in; // Load stats for the current frame. - mod_frame_err = calculate_modified_err(cpi, this_frame); + mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); // Note the error of the frame at the start of the group. This will be // the GF frame error if we code a normal gf. @@ -1551,7 +1540,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { ++i; // Accumulate error score of frames in this gf group. - mod_frame_err = calculate_modified_err(cpi, this_frame); + mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); gf_group_err += mod_frame_err; if (EOF == input_stats(twopass, &next_frame)) @@ -1626,7 +1615,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { break; if (i < rc->frames_to_key) { - mod_frame_err = calculate_modified_err(cpi, this_frame); + mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); gf_group_err += mod_frame_err; } } @@ -1774,11 +1763,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Allocate bits to a normal frame that is neither a gf an arf or a key frame. static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - TWO_PASS *twopass = &cpi->twopass; + TWO_PASS *const twopass = &cpi->twopass; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; // For a single frame. - const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + const int max_bits = frame_max_bits(&cpi->rc, oxcf); // Calculate modified prediction error used in bit allocation. - const double modified_err = calculate_modified_err(cpi, this_frame); + const double modified_err = calculate_modified_err(twopass, oxcf, this_frame); int target_frame_size; double err_fraction; @@ -1884,6 +1874,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; const FIRSTPASS_STATS first_frame = *this_frame; const FIRSTPASS_STATS *start_position = twopass->stats_in; FIRSTPASS_STATS next_frame; @@ -1913,14 +1904,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_bits = 0; // Total bits available to kf group twopass->kf_group_error_left = 0; // Group modified error score. - kf_mod_err = calculate_modified_err(cpi, this_frame); + kf_mod_err = calculate_modified_err(twopass, oxcf, this_frame); // Find the next keyframe. i = 0; while (twopass->stats_in < twopass->stats_in_end && rc->frames_to_key < cpi->oxcf.key_freq) { // Accumulate kf group error. - kf_group_err += calculate_modified_err(cpi, this_frame); + kf_group_err += calculate_modified_err(twopass, oxcf, this_frame); // Load the next frame's stats. last_frame = *this_frame; @@ -1982,7 +1973,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Rescan to get the correct error data for the forced kf group. for (i = 0; i < rc->frames_to_key; ++i) { - kf_group_err += calculate_modified_err(cpi, &tmp_frame); + kf_group_err += calculate_modified_err(twopass, oxcf, &tmp_frame); input_stats(twopass, &tmp_frame); } rc->next_key_frame_forced = 1; @@ -1996,7 +1987,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special case for the last key frame of the file. if (twopass->stats_in >= twopass->stats_in_end) { // Accumulate kf group error. - kf_group_err += calculate_modified_err(cpi, this_frame); + kf_group_err += calculate_modified_err(twopass, oxcf, this_frame); } // Calculate the number of bits that should be assigned to the kf group. diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 7b2d1e2f0..5130c8f00 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -80,12 +80,16 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } if (speed >= 2) { - if (MIN(cm->width, cm->height) >= 720) + if (MIN(cm->width, cm->height) >= 720) { + sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->last_partitioning_redo_frequency = 3; sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else + } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - + sf->last_partitioning_redo_frequency = 2; + sf->lf_motion_threshold = NO_MOITION_THRESHOLD; + } sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | @@ -97,7 +101,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; } if (speed >= 3) { @@ -108,6 +111,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, else sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->last_partitioning_redo_frequency = 3; sf->recode_loop = ALLOW_RECODE_KFMAXBW; sf->adaptive_rd_thresh = 3; sf->mode_skip_start = 6; @@ -196,6 +201,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index d8c1a8be2..a54599e6a 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -44,6 +44,11 @@ typedef enum { } SUBPEL_SEARCH_METHODS; typedef enum { + NO_MOITION_THRESHOLD = 0, + LOW_MOITION_THRESHOLD = 7 +} MOTION_THRESHOLD; + +typedef enum { LAST_FRAME_PARTITION_OFF = 0, LAST_FRAME_PARTITION_LOW_MOTION = 1, LAST_FRAME_PARTITION_ALL = 2 @@ -200,6 +205,10 @@ typedef struct SPEED_FEATURES { // partitioning. LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + // The threshold is to determine how slow the motino is, it is used when + // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION + MOTION_THRESHOLD lf_motion_threshold; + // Determine which method we use to determine transform size. We can choose // between options like full rd, largest for prediction size, largest // for intra and model coefs for the rest. diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index 686582238..1f58d872e 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -1187,7 +1187,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); @@ -1513,8 +1513,8 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5); const __m128i u0 = _mm_madd_epi16(t0, k__cospi_m08_p24); const __m128i u1 = _mm_madd_epi16(t1, k__cospi_m08_p24); - const __m128i u2 = _mm_madd_epi16(t2, k__cospi_m24_m08); - const __m128i u3 = _mm_madd_epi16(t3, k__cospi_m24_m08); + const __m128i u2 = _mm_madd_epi16(t2, k__cospi_p24_p08); + const __m128i u3 = _mm_madd_epi16(t3, k__cospi_p24_p08); // dct_const_round_shift const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); @@ -1535,8 +1535,8 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5); const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p24_p08); const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p24_p08); - const __m128i u2 = _mm_madd_epi16(t2, k__cospi_m08_p24); - const __m128i u3 = _mm_madd_epi16(t3, k__cospi_m08_p24); + const __m128i u2 = _mm_madd_epi16(t2, k__cospi_p08_m24); + const __m128i u3 = _mm_madd_epi16(t3, k__cospi_p08_m24); // dct_const_round_shift const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); @@ -1554,10 +1554,10 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { { step1_0 = _mm_add_epi16(step3_0, step2_1); step1_1 = _mm_sub_epi16(step3_0, step2_1); - step1_2 = _mm_sub_epi16(step3_3, step2_2); - step1_3 = _mm_add_epi16(step3_3, step2_2); - step1_4 = _mm_add_epi16(step3_4, step2_5); - step1_5 = _mm_sub_epi16(step3_4, step2_5); + step1_2 = _mm_add_epi16(step3_3, step2_2); + step1_3 = _mm_sub_epi16(step3_3, step2_2); + step1_4 = _mm_sub_epi16(step3_4, step2_5); + step1_5 = _mm_add_epi16(step3_4, step2_5); step1_6 = _mm_sub_epi16(step3_7, step2_6); step1_7 = _mm_add_epi16(step3_7, step2_6); } @@ -1848,7 +1848,7 @@ void fdct16_8col(__m128i *in) { const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); @@ -2052,10 +2052,10 @@ void fdct16_8col(__m128i *in) { v[0] = _mm_madd_epi16(u[0], k__cospi_m08_p24); v[1] = _mm_madd_epi16(u[1], k__cospi_m08_p24); - v[2] = _mm_madd_epi16(u[2], k__cospi_m24_m08); - v[3] = _mm_madd_epi16(u[3], k__cospi_m24_m08); - v[4] = _mm_madd_epi16(u[2], k__cospi_m08_p24); - v[5] = _mm_madd_epi16(u[3], k__cospi_m08_p24); + v[2] = _mm_madd_epi16(u[2], k__cospi_p24_p08); + v[3] = _mm_madd_epi16(u[3], k__cospi_p24_p08); + v[4] = _mm_madd_epi16(u[2], k__cospi_p08_m24); + v[5] = _mm_madd_epi16(u[3], k__cospi_p08_m24); v[6] = _mm_madd_epi16(u[0], k__cospi_p24_p08); v[7] = _mm_madd_epi16(u[1], k__cospi_p24_p08); @@ -2085,10 +2085,10 @@ void fdct16_8col(__m128i *in) { // stage 5 s[0] = _mm_add_epi16(p[0], t[1]); s[1] = _mm_sub_epi16(p[0], t[1]); - s[2] = _mm_sub_epi16(p[3], t[2]); - s[3] = _mm_add_epi16(p[3], t[2]); - s[4] = _mm_add_epi16(p[4], t[5]); - s[5] = _mm_sub_epi16(p[4], t[5]); + s[2] = _mm_add_epi16(p[3], t[2]); + s[3] = _mm_sub_epi16(p[3], t[2]); + s[4] = _mm_sub_epi16(p[4], t[5]); + s[5] = _mm_add_epi16(p[4], t[5]); s[6] = _mm_sub_epi16(p[7], t[6]); s[7] = _mm_add_epi16(p[7], t[6]); |