diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 70 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 25 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 2 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 19 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 4 |
7 files changed, 54 insertions, 81 deletions
diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm index 5c097ccc8..3a0ff608b 100644 --- a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm @@ -259,7 +259,7 @@ idct32_transpose_pair_loop ; transpose pair loop processing add r3, r3, #1 cmp r3, #1 - BLE idct32_transpose_pair_loop + ble idct32_transpose_pair_loop ; restore r0/input to its original value sub r0, r0, #32*8*2 @@ -954,7 +954,7 @@ idct32_transpose_pair_loop ; bands loop processing add r4, r4, #1 cmp r4, #3 - BLE idct32_bands_loop + ble idct32_bands_loop pop {r4} bx lr @@ -1005,7 +1005,7 @@ idct32_combine_add_loop ; loop processing add r3, r3, #1 cmp r3, #31 - BLE idct32_combine_add_loop + ble idct32_combine_add_loop bx lr ENDP ; |idct32_transpose| diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index ad7e6d821..6e44e604c 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -2093,14 +2093,19 @@ void vp9_second_pass(VP9_COMP *cpi) { cpi->twopass.est_max_qcorrection_factor = 1.0; // Set a cq_level in constrained quality mode. + // Commenting this code out for now since it does not seem to be + // working well. + /* if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats, - section_target_bandwidth); + section_target_bandwidth); - cpi->cq_target_quality = cpi->oxcf.cq_level; if (est_cq > cpi->cq_target_quality) cpi->cq_target_quality = est_cq; + else + cpi->cq_target_quality = cpi->oxcf.cq_level; } + */ // guess at maxq needed in 2nd pass cpi->twopass.maxq_max_limit = cpi->worst_quality; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index ad8c8999a..7dd786904 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -136,66 +136,26 @@ void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { } void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { - int len; - int search_site_count = 0; + int len, ss_count = 1; - // Generate offsets for 8 search sites per step. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; + x->ss[0].mv.col = x->ss[0].mv.row = 0; + x->ss[0].offset = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -len; - x->ss[search_site_count].offset = -len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = len; - x->ss[search_site_count].offset = len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -len; - x->ss[search_site_count].mv.row = -len; - x->ss[search_site_count].offset = -len * stride - len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = len; - x->ss[search_site_count].mv.row = -len; - x->ss[search_site_count].offset = -len * stride + len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -len; - x->ss[search_site_count].mv.row = len; - x->ss[search_site_count].offset = len * stride - len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = len; - x->ss[search_site_count].mv.row = len; - x->ss[search_site_count].offset = len * stride + len; - search_site_count++; + // Generate offsets for 8 search sites per step. + const MV ss_mvs[8] = { + {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, + {-len, -len}, {-len, len}, {len, -len}, {len, len} + }; + int i; + for (i = 0; i < 8; ++i) { + search_site *const ss = &x->ss[ss_count++]; + ss->mv = ss_mvs[i]; + ss->offset = ss->mv.row * stride + ss->mv.col; + } } - x->ss_count = search_site_count; + x->ss_count = ss_count; x->searches_per_step = 8; } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index bc1b4a8c6..1d0733ed6 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -122,6 +122,8 @@ static int kf_high_motion_minq[QINDEX_RANGE]; static int gf_low_motion_minq[QINDEX_RANGE]; static int gf_high_motion_minq[QINDEX_RANGE]; static int inter_minq[QINDEX_RANGE]; +static int afq_low_motion_minq[QINDEX_RANGE]; +static int afq_high_motion_minq[QINDEX_RANGE]; static INLINE void Scale2Ratio(int mode, int *hr, int *hs) { switch (mode) { @@ -205,7 +207,16 @@ static void init_minq_luts(void) { -0.00113, 0.697, 0.0); - + afq_low_motion_minq[i] = calculate_minq_index(maxq, + 0.0000015, + -0.0009, + 0.33, + 0.0); + afq_high_motion_minq[i] = calculate_minq_index(maxq, + 0.0000021, + -0.00125, + 0.57, + 0.0); } } @@ -2765,16 +2776,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } else { if (cpi->frames_since_key > 1) { if (cpi->gfu_boost > high) { - cpi->active_best_quality = cpi->cq_target_quality * 6 / 16; + cpi->active_best_quality = afq_low_motion_minq[q]; } else if (cpi->gfu_boost < low) { - cpi->active_best_quality = cpi->cq_target_quality * 11 / 16; + cpi->active_best_quality = afq_high_motion_minq[q]; } else { const int gap = high - low; const int offset = high - cpi->gfu_boost; - const int qdiff = cpi->cq_target_quality * 5 / 16; + const int qdiff = afq_high_motion_minq[q] - afq_low_motion_minq[q]; const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - cpi->active_best_quality = cpi->cq_target_quality * 6 / 16 - + adjustment; + + cpi->active_best_quality = afq_low_motion_minq[q] + adjustment; } } } @@ -3262,7 +3273,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // in this frame. // update_base_skip_probs(cpi); -#if CONFIG_INTERNAL_STATS +#if 0 // CONFIG_INTERNAL_STATS { FILE *f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w"); int recon_err; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index adaa1a29d..041316b4f 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2318,7 +2318,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // Further refinement that is encode side only to test the top few candidates // in full and choose the best as the centre point for subsequent searches. // The current implementation doesn't support scaling. - if (!vp9_is_scaled(&scale[frame_type])) + if (!vp9_is_scaled(&scale[frame_type]) && block_size >= BLOCK_8X8) mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, frame_type, block_size); } diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index eb271fef3..ad3d01da9 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -171,22 +171,21 @@ static INLINE void transpose_4x4(__m128i *res) { void fdct4_1d_sse2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u[4], v[4]; - u[0] = _mm_add_epi16(in[0], in[3]); - u[1] = _mm_add_epi16(in[1], in[2]); - u[2] = _mm_sub_epi16(in[1], in[2]); - u[3] = _mm_sub_epi16(in[0], in[3]); + u[0]=_mm_unpacklo_epi16(in[0], in[1]); + u[1]=_mm_unpacklo_epi16(in[3], in[2]); + + v[0] = _mm_add_epi16(u[0], u[1]); + v[1] = _mm_sub_epi16(u[0], u[1]); - v[0] = _mm_unpacklo_epi16(u[0], u[1]); - v[1] = _mm_unpacklo_epi16(u[2], u[3]); u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0 u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2 - u[2] = _mm_madd_epi16(v[1], k__cospi_p24_p08); // 1 - u[3] = _mm_madd_epi16(v[1], k__cospi_m08_p24); // 3 + u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1 + u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3 v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 4f63c52fd..08a1a8458 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -277,11 +277,9 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, // CBR code has been deprectated for experimental phase. // CQ mode not yet tested oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; - /* if (cfg.rc_end_usage == VPX_CQ) oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; - */ - if (cfg.rc_end_usage == VPX_Q) + else if (cfg.rc_end_usage == VPX_Q) oxcf->end_usage = USAGE_CONSTANT_QUALITY; oxcf->target_bandwidth = cfg.rc_target_bitrate; |