diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_blockd.h | 4 | ||||
-rw-r--r-- | vp9/common/vp9_common_data.c | 2 | ||||
-rw-r--r-- | vp9/common/vp9_common_data.h | 1 | ||||
-rw-r--r-- | vp9/common/vp9_onyxc_int.h | 2 | ||||
-rw-r--r-- | vp9/common/vp9_reconinter.h | 32 | ||||
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 100 | ||||
-rw-r--r-- | vp9/common/x86/vp9_idct_intrin_sse2.c | 286 | ||||
-rw-r--r-- | vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c | 591 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 76 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 554 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 143 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 143 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 89 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 9 | ||||
-rw-r--r-- | vp9/vp9_common.mk | 1 | ||||
-rw-r--r-- | vp9/vp9_dx_iface.c | 125 |
18 files changed, 768 insertions, 1394 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index c6af7f6da..21e2b16a4 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -115,10 +115,6 @@ static INLINE int mi_width_log2(BLOCK_SIZE sb_type) { return mi_width_log2_lookup[sb_type]; } -static INLINE int mi_height_log2(BLOCK_SIZE sb_type) { - return mi_height_log2_lookup[sb_type]; -} - // This structure now relates to 8x8 block regions. typedef struct { MB_PREDICTION_MODE mode, uv_mode; diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c index 886c0afc6..a927823e0 100644 --- a/vp9/common/vp9_common_data.c +++ b/vp9/common/vp9_common_data.c @@ -26,8 +26,6 @@ const int mi_width_log2_lookup[BLOCK_SIZES] = {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; const int num_8x8_blocks_wide_lookup[BLOCK_SIZES] = {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; -const int mi_height_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3}; const int num_8x8_blocks_high_lookup[BLOCK_SIZES] = {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h index a367c65c6..5222d29c1 100644 --- a/vp9/common/vp9_common_data.h +++ b/vp9/common/vp9_common_data.h @@ -16,7 +16,6 @@ extern const int b_width_log2_lookup[BLOCK_SIZES]; extern const int b_height_log2_lookup[BLOCK_SIZES]; extern const int mi_width_log2_lookup[BLOCK_SIZES]; -extern const int mi_height_log2_lookup[BLOCK_SIZES]; extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZES]; extern const int num_8x8_blocks_high_lookup[BLOCK_SIZES]; extern const int num_4x4_blocks_high_lookup[BLOCK_SIZES]; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index b5ed959e1..f6fe4d3f1 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -346,7 +346,7 @@ static INLINE int partition_plane_context( const int bs = 1 << bsl; int above = 0, left = 0, i; - assert(mi_width_log2(bsize) == mi_height_log2(bsize)); + assert(b_width_log2(bsize) == b_height_log2(bsize)); assert(bsl >= 0); for (i = 0; i < bs; i++) { diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 9f379399c..3cc16d94e 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -58,34 +58,34 @@ static void setup_pred_plane(struct buf_2d *dst, static void setup_dst_planes(MACROBLOCKD *xd, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { - uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, - src->alpha_buffer}; - int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, - src->alpha_stride}; + uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *pd = &xd->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, pd->subsampling_x, pd->subsampling_y); } } -static void setup_pre_planes(MACROBLOCKD *xd, int i, +static void setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *sf) { - if (src) { - int j; - uint8_t* buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, - src->alpha_buffer}; - int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, - src->alpha_stride}; + if (src != NULL) { + int i; + uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; - for (j = 0; j < MAX_MB_PLANE; ++j) { - struct macroblockd_plane *pd = &xd->plane[j]; - setup_pred_plane(&pd->pre[i], buffers[j], strides[j], - mi_row, mi_col, sf, pd->subsampling_x, pd->subsampling_y); + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, + sf, pd->subsampling_x, pd->subsampling_y); } } } diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index f4f758297..f95423678 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -23,105 +23,20 @@ typedef void filter8_1dfunction ( const short *filter ); -#if (HAVE_SSSE3) +#if HAVE_SSSE3 +filter8_1dfunction vp9_filter_block1d16_v8_ssse3; +filter8_1dfunction vp9_filter_block1d16_h8_ssse3; +filter8_1dfunction vp9_filter_block1d8_v8_ssse3; +filter8_1dfunction vp9_filter_block1d8_h8_ssse3; +filter8_1dfunction vp9_filter_block1d4_v8_ssse3; +filter8_1dfunction vp9_filter_block1d4_h8_ssse3; filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; -#if (ARCH_X86_64) -filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d4_v8_intrin_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; - -void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Ensure the filter can be compressed to int16_t. */ - if (x_step_q4 == 16 && filter_x[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_h8_intrin_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_h8_intrin_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_h8_intrin_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} -void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (y_step_q4 == 16 && filter_y[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_v8_intrin_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_v8_intrin_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_v8_intrin_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -#else -filter8_1dfunction vp9_filter_block1d16_v8_ssse3; -filter8_1dfunction vp9_filter_block1d16_h8_ssse3; -filter8_1dfunction vp9_filter_block1d8_v8_ssse3; -filter8_1dfunction vp9_filter_block1d8_h8_ssse3; -filter8_1dfunction vp9_filter_block1d4_v8_ssse3; -filter8_1dfunction vp9_filter_block1d4_h8_ssse3; void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, @@ -198,7 +113,6 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, w, h); } } -#endif void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index 501bed5a8..2f6149464 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -380,17 +380,13 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ } -#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1, out2, out3) \ +#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ { \ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \ - const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \ \ in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ - in2 = _mm_unpacklo_epi32(tr0_2, tr0_3); /* i5 i4 */ \ - in3 = _mm_unpackhi_epi32(tr0_2, tr0_3); /* i7 i6 */ \ } #define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \ @@ -662,7 +658,6 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { } static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { - const __m128i zero = _mm_setzero_si128(); const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); @@ -677,7 +672,6 @@ static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4); out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6); out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); - out[4] = out[5] = out[6] = out[7] = zero; } static void idct8_1d_sse2(__m128i *in) { @@ -1270,6 +1264,114 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { stp2_10, stp2_13, stp2_11, stp2_12) \ } +#define IDCT16_10_1D \ + /* Stage2 */ \ + { \ + const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ + const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \ + const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \ + const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, \ + stg2_0, stg2_1, stg2_6, stg2_7, \ + stp1_8_0, stp1_15, stp1_11, stp1_12_0) \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \ + const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \ + \ + MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, \ + stg3_0, stg3_1, \ + stp2_4, stp2_7) \ + \ + stp1_9 = stp1_8_0; \ + stp1_10 = stp1_11; \ + \ + stp1_13 = stp1_12_0; \ + stp1_14 = stp1_15; \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \ + const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, \ + stg4_0, stg4_1, \ + stp1_0, stp1_1) \ + stp2_5 = stp2_4; \ + stp2_6 = stp2_7; \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ + stg4_4, stg4_5, stg4_6, stg4_7, \ + stp2_9, stp2_14, stp2_10, stp2_13) \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_2 = stp1_1; \ + stp1_3 = stp1_0; \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ + \ + stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ + stg6_0, stg4_0, stg6_0, stg4_0, \ + stp2_10, stp2_13, stp2_11, stp2_12) \ + } + void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); @@ -2433,22 +2535,14 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); @@ -2456,119 +2550,72 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); __m128i in[16], l[16]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, stp1_8_0, stp1_12_0; __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; - in[4] = in[5] = in[6] = in[7] = in[12] = in[13] = in[14] = in[15] = zero; - // 1-D idct. Load input data. + // First 1-D inverse DCT + // Load input data. in[0] = _mm_load_si128((const __m128i *)input); - in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1)); in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3)); in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5)); in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7)); - TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1], in[2], in[3]); - TRANSPOSE_8X4(in[8], in[9], in[10], in[11], in[8], in[9], in[10], in[11]); + TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); // Stage2 { - const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], in[11]); - const __m128i lo_9_7 = _mm_unpackhi_epi16(in[8], in[3]); - const __m128i lo_5_11 = _mm_unpackhi_epi16(in[2], in[9]); - const __m128i lo_13_3 = _mm_unpackhi_epi16(in[10], in[1]); + const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero); + const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]); tmp0 = _mm_madd_epi16(lo_1_15, stg2_0); tmp2 = _mm_madd_epi16(lo_1_15, stg2_1); - tmp4 = _mm_madd_epi16(lo_9_7, stg2_2); - tmp6 = _mm_madd_epi16(lo_9_7, stg2_3); - tmp1 = _mm_madd_epi16(lo_5_11, stg2_4); - tmp3 = _mm_madd_epi16(lo_5_11, stg2_5); tmp5 = _mm_madd_epi16(lo_13_3, stg2_6); tmp7 = _mm_madd_epi16(lo_13_3, stg2_7); tmp0 = _mm_add_epi32(tmp0, rounding); tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); tmp5 = _mm_add_epi32(tmp5, rounding); tmp7 = _mm_add_epi32(tmp7, rounding); tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - stp2_8 = _mm_packs_epi32(tmp0, zero); - stp2_15 = _mm_packs_epi32(tmp2, zero); - stp2_9 = _mm_packs_epi32(tmp4, zero); - stp2_14 = _mm_packs_epi32(tmp6, zero); - - stp2_10 = _mm_packs_epi32(tmp1, zero); - stp2_13 = _mm_packs_epi32(tmp3, zero); - stp2_11 = _mm_packs_epi32(tmp5, zero); - stp2_12 = _mm_packs_epi32(tmp7, zero); + stp2_8 = _mm_packs_epi32(tmp0, tmp2); + stp2_11 = _mm_packs_epi32(tmp5, tmp7); } // Stage3 { - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], in[11]); - const __m128i lo_10_6 = _mm_unpacklo_epi16(in[9], in[3]); + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], zero); tmp0 = _mm_madd_epi16(lo_2_14, stg3_0); tmp2 = _mm_madd_epi16(lo_2_14, stg3_1); - tmp4 = _mm_madd_epi16(lo_10_6, stg3_2); - tmp6 = _mm_madd_epi16(lo_10_6, stg3_3); tmp0 = _mm_add_epi32(tmp0, rounding); tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp1_4 = _mm_packs_epi32(tmp0, zero); - stp1_7 = _mm_packs_epi32(tmp2, zero); - stp1_5 = _mm_packs_epi32(tmp4, zero); - stp1_6 = _mm_packs_epi32(tmp6, zero); - stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); + stp1_13 = _mm_unpackhi_epi64(stp2_11, zero); + stp1_14 = _mm_unpackhi_epi64(stp2_8, zero); - stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); + stp1_4 = _mm_packs_epi32(tmp0, tmp2); } // Stage4 { - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); - const __m128i lo_4_12 = _mm_unpacklo_epi16(in[2], in[10]); - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14); + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13); tmp0 = _mm_madd_epi16(lo_0_8, stg4_0); tmp2 = _mm_madd_epi16(lo_0_8, stg4_1); - tmp4 = _mm_madd_epi16(lo_4_12, stg4_2); - tmp6 = _mm_madd_epi16(lo_4_12, stg4_3); tmp1 = _mm_madd_epi16(lo_9_14, stg4_4); tmp3 = _mm_madd_epi16(lo_9_14, stg4_5); tmp5 = _mm_madd_epi16(lo_10_13, stg4_6); @@ -2576,8 +2623,6 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, tmp0 = _mm_add_epi32(tmp0, rounding); tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); tmp1 = _mm_add_epi32(tmp1, rounding); tmp3 = _mm_add_epi32(tmp3, rounding); tmp5 = _mm_add_epi32(tmp5, rounding); @@ -2585,49 +2630,40 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - stp2_0 = _mm_packs_epi32(tmp0, zero); - stp2_1 = _mm_packs_epi32(tmp2, zero); - stp2_2 = _mm_packs_epi32(tmp4, zero); - stp2_3 = _mm_packs_epi32(tmp6, zero); - stp2_9 = _mm_packs_epi32(tmp1, zero); - stp2_14 = _mm_packs_epi32(tmp3, zero); - stp2_10 = _mm_packs_epi32(tmp5, zero); - stp2_13 = _mm_packs_epi32(tmp7, zero); - - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); + stp1_0 = _mm_packs_epi32(tmp0, tmp0); + stp1_1 = _mm_packs_epi32(tmp2, tmp2); + stp2_9 = _mm_packs_epi32(tmp1, tmp3); + stp2_10 = _mm_packs_epi32(tmp5, tmp7); + + stp2_6 = _mm_unpackhi_epi64(stp1_4, zero); } // Stage5 and Stage6 { - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); - - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); - - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); + tmp0 = _mm_add_epi16(stp2_8, stp2_11); + tmp1 = _mm_sub_epi16(stp2_8, stp2_11); + tmp2 = _mm_add_epi16(stp2_9, stp2_10); + tmp3 = _mm_sub_epi16(stp2_9, stp2_10); + + stp1_9 = _mm_unpacklo_epi64(tmp2, zero); + stp1_10 = _mm_unpacklo_epi64(tmp3, zero); + stp1_8 = _mm_unpacklo_epi64(tmp0, zero); + stp1_11 = _mm_unpacklo_epi64(tmp1, zero); + + stp1_13 = _mm_unpackhi_epi64(tmp3, zero); + stp1_14 = _mm_unpackhi_epi64(tmp2, zero); + stp1_12 = _mm_unpackhi_epi64(tmp1, zero); + stp1_15 = _mm_unpackhi_epi64(tmp0, zero); } // Stage6 { - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp1_4); const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); @@ -2652,21 +2688,26 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - stp1_5 = _mm_packs_epi32(tmp1, zero); - stp1_6 = _mm_packs_epi32(tmp3, zero); + stp1_6 = _mm_packs_epi32(tmp3, tmp1); + stp2_10 = _mm_packs_epi32(tmp0, zero); stp2_13 = _mm_packs_epi32(tmp2, zero); stp2_11 = _mm_packs_epi32(tmp4, zero); stp2_12 = _mm_packs_epi32(tmp6, zero); - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); + tmp0 = _mm_add_epi16(stp1_0, stp1_4); + tmp1 = _mm_sub_epi16(stp1_0, stp1_4); + tmp2 = _mm_add_epi16(stp1_1, stp1_6); + tmp3 = _mm_sub_epi16(stp1_1, stp1_6); + + stp2_0 = _mm_unpackhi_epi64(tmp0, zero); + stp2_1 = _mm_unpacklo_epi64(tmp2, zero); + stp2_2 = _mm_unpackhi_epi64(tmp2, zero); + stp2_3 = _mm_unpacklo_epi64(tmp0, zero); + stp2_4 = _mm_unpacklo_epi64(tmp1, zero); + stp2_5 = _mm_unpackhi_epi64(tmp3, zero); + stp2_6 = _mm_unpacklo_epi64(tmp3, zero); + stp2_7 = _mm_unpackhi_epi64(tmp1, zero); } // Stage7. Left 8x16 only. @@ -2687,12 +2728,11 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, l[14] = _mm_sub_epi16(stp2_1, stp1_14); l[15] = _mm_sub_epi16(stp2_0, stp1_15); - // 2-D idct. We do 2 8x16 blocks. + // Second 1-D inverse transform, performed per 8x16 block for (i = 0; i < 2; i++) { array_transpose_4X8(l + 8*i, in); - in[8] = in[9] = in[10] = in[11] = in[12] = in[13] = in[14] = in[15] = zero; - IDCT16_1D + IDCT16_10_1D // Stage7 in[0] = _mm_add_epi16(stp2_0, stp1_15); diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c deleted file mode 100644 index 303fced3b..000000000 --- a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <tmmintrin.h> -#include "vpx_ports/mem.h" -#include "vpx_ports/emmintrin_compat.h" - - -// filters only for the 4_h8 convolution -DECLARE_ALIGNED(16, const unsigned char, -filt1_4_h8[16])= {0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6}; - -DECLARE_ALIGNED(16, const unsigned char, -filt2_4_h8[16])= {4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10}; - -// filters for 8_h8 and 16_h8 -DECLARE_ALIGNED(16, const unsigned char, -filt1_global[16])= {0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}; - -DECLARE_ALIGNED(16, const unsigned char, -filt2_global[16])= {2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10}; - -DECLARE_ALIGNED(16, const unsigned char, -filt3_global[16])= {4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12}; - -DECLARE_ALIGNED(16, const unsigned char, -filt4_global[16])= {6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14}; - - - -void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, srcReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 =_mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter into the first lane - firstFilters = _mm_shufflelo_epi16(filtersReg, 0); - // duplicate only the third 16 bit in the filter into the first lane - secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); - // duplicate only the seconds 16 bits in the filter into the second lane - firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); - // duplicate only the forth 16 bits in the filter into the second lane - secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); - - // loading the local filters - thirdFilters =_mm_load_si128((__m128i const *)filt1_4_h8); - forthFilters = _mm_load_si128((__m128i const *)filt2_4_h8); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, thirdFilters); - srcRegFilt2= _mm_shuffle_epi8(srcReg, forthFilters); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // extract the higher half of the lane - srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8); - srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8); - - minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2); - - // add and saturate all the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - - srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2); - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pixels_per_line; - - // save only 4 bytes - *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1); - - output_ptr+=output_pitch; - } -} - - -void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg); - srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters); - - // add and saturate all the results together - minReg = _mm_min_epi16(srcRegFilt4, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - - srcRegFilt4= _mm_max_epi16(srcRegFilt4, srcRegFilt3); - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pixels_per_line; - - // save only 8 bytes - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcReg1, srcReg2; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1_1, srcRegFilt2_1, srcRegFilt2, srcRegFilt3; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3)); - - // filter the source buffer - srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt2Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt4Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - - // reading the next 16 bytes. - // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - // filter the source buffer - srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt2Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt4Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, addFilterReg64); - - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); - srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); - - src_ptr+=src_pixels_per_line; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); - - output_ptr+=output_pitch; - } -} - - - -void vp9_filter_block1d4_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, firstFilters, secondFilters; - __m128i minReg, srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter into the first lane - firstFilters = _mm_shufflelo_epi16(filtersReg, 0); - // duplicate only the second 16 bits in the filter into the second lane - firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); - // duplicate only the third 16 bits in the filter into the first lane - secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); - // duplicate only the forth 16 bits in the filter into the second lane - secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); - - for (i = 0; i < output_height; i++) { - // load the first 4 byte - srcRegFilt1 = _mm_cvtsi32_si128(*((int*)&src_ptr[0])); - // load the next 4 bytes in stride of src_pitch - srcRegFilt2 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch)[0])); - - // merge the result together - srcRegFilt1 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2); - - - srcRegFilt2 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*2)[0])); - srcRegFilt3 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*3)[0])); - - // merge the result together - srcRegFilt2 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3); - - srcRegFilt3 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*4)[0])); - srcRegFilt4 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*5)[0])); - - // merge the result together - srcRegFilt3 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4); - srcRegFilt1 = _mm_unpacklo_epi64(srcRegFilt1, srcRegFilt2); - - srcRegFilt4 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*6)[0])); - srcRegFilt2 = _mm_cvtsi32_si128(*((int*)&(src_ptr+src_pitch*7)[0])); - - // merge the result together - srcRegFilt4 = _mm_unpacklo_epi8(srcRegFilt4, srcRegFilt2); - srcRegFilt3 = _mm_unpacklo_epi64(srcRegFilt3, srcRegFilt4); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - - // extract the second lane of the 128 bit register - srcRegFilt2 = _mm_srli_si128(srcRegFilt1, 8); - - // add and saturate the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_srli_si128(srcRegFilt3, 8)); - srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pitch; - - // save only 4 bytes convolve result - *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1); - - output_ptr+=out_pitch; - } -} - -void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, minReg, srcRegFilt6; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4, srcRegFilt5; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - for (i = 0; i < output_height; i++) { - // load the first 8 bytes - srcRegFilt1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]); - // load the next 8 bytes in stride of src_pitch - srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch)[0]); - srcRegFilt3 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*2)[0]); - srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*3)[0]); - - // merge the result together - srcRegFilt1 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2); - srcRegFilt3 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4); - - // load the next 8 bytes in stride of src_pitch - srcRegFilt2 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*4)[0]); - srcRegFilt4 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*5)[0]); - srcRegFilt5 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*6)[0]); - srcRegFilt6 = _mm_loadl_epi64((__m128i *)&(src_ptr+src_pitch*7)[0]); - - // merge the result together - srcRegFilt2 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt4); - srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt5, srcRegFilt6); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters); - - // add and saturate the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5); - srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pitch; - - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=out_pitch; - } -} - - -void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt2, srcRegFilt3; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt4, srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((__m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - - for (i = 0; i < output_height; i++) { - // load the first 16 bytes - srcRegFilt1 = _mm_loadu_si128((__m128i *)(src_ptr)); - // load the next 16 bytes in stride of src_pitch - srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch)); - srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6)); - srcRegFilt4 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)); - - // merge the result together - srcRegFilt5 = _mm_unpacklo_epi8(srcRegFilt1, srcRegFilt2); - srcRegFilt6 = _mm_unpacklo_epi8(srcRegFilt3, srcRegFilt4); - srcRegFilt1 = _mm_unpackhi_epi8(srcRegFilt1, srcRegFilt2); - srcRegFilt3 = _mm_unpackhi_epi8(srcRegFilt3, srcRegFilt4); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters); - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters); - - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - - // load the next 16 bytes in stride of two/three src_pitch - srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2)); - srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3)); - - // merge the result together - srcRegFilt4 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3); - srcRegFilt6 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, secondFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters); - - // load the next 16 bytes in stride of four/five src_pitch - srcRegFilt2 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4)); - srcRegFilt3 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5)); - - // merge the result together - srcRegFilt7 = _mm_unpacklo_epi8(srcRegFilt2, srcRegFilt3); - srcRegFilt8 = _mm_unpackhi_epi8(srcRegFilt2, srcRegFilt3); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters); - srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters); - - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_min_epi16(srcRegFilt4, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt6, srcRegFilt8)); - - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_max_epi16(srcRegFilt4, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt6, srcRegFilt8)); - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7); - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1); - - src_ptr+=src_pitch; - - // save 16 bytes convolve result - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - - output_ptr+=out_pitch; - } -} diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index d17952487..6894f553f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1923,9 +1923,6 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if (cpi->sf.reference_masking) - rd_pick_reference_frame(cpi, tile, mi_row, mi_col); - if (cpi->sf.use_lastframe_partitioning || cpi->sf.use_one_partition_size_always ) { const int idx_str = cm->mode_info_stride * mi_row + mi_col; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 5317661c0..812ffa96d 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -106,6 +106,7 @@ static int lookup_next_frame_stats(const struct twopass_rc *p, return 1; } + // Read frame stats at an offset from the current position static int read_frame_stats(const struct twopass_rc *p, FIRSTPASS_STATS *frame_stats, int offset) { @@ -149,7 +150,7 @@ static void output_stats(const VP9_COMP *cpi, FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); - fprintf(stdout, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" + fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" "%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f" "%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n", stats->frame, @@ -349,17 +350,14 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) { } -// This function returns the current per frame maximum bitrate target. +// This function returns the maximum target rate per frame. static int frame_max_bits(VP9_COMP *cpi) { - // Max allocation for a single frame based on the max section guidelines - // passed in and how many bits are left. - // For VBR base this on the bits and frames left plus the - // two_pass_vbrmax_section rate passed in by the user. - const double max_bits = (1.0 * cpi->twopass.bits_left / - (cpi->twopass.total_stats.count - cpi->common.current_video_frame)) * - (cpi->oxcf.two_pass_vbrmax_section / 100.0); + int64_t max_bits = + ((int64_t)cpi->rc.av_per_frame_bandwidth * + (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100; + if (max_bits < 0) - return 0; + return 0; if (max_bits >= INT_MAX) return INT_MAX; return (int)max_bits; @@ -1662,7 +1660,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Don't allow a gf too near the next kf if ((cpi->rc.frames_to_key - i) < MIN_GF_INTERVAL) { - while (i < cpi->rc.frames_to_key) { + while (i < (cpi->rc.frames_to_key + !cpi->rc.next_key_frame_forced)) { i++; if (EOF == input_stats(&cpi->twopass, this_frame)) @@ -1697,6 +1695,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && (i >= MIN_GF_INTERVAL) && + // for real scene cuts (not forced kfs) dont allow arf very near kf. + (cpi->rc.next_key_frame_forced || + (i <= (cpi->rc.frames_to_key - MIN_GF_INTERVAL))) && ((next_frame.pcnt_inter > 0.75) || (next_frame.pcnt_second_ref > 0.5)) && ((mv_in_out_accumulator / (double)i > -0.2) || @@ -1765,18 +1766,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { #endif #endif - // Now decide how many bits should be allocated to the GF group as a - // proportion of those remaining in the kf group. - // The final key frame group in the clip is treated as a special case - // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. - // This is also important for short clips where there may only be one - // key frame. - if (cpi->rc.frames_to_key >= (int)(cpi->twopass.total_stats.count - - cpi->common.current_video_frame)) { - cpi->twopass.kf_group_bits = - (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; - } - // Calculate the bits to be allocated to the group as a whole if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { @@ -1863,9 +1852,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (gf_bits < 0) gf_bits = 0; - // Add in minimum for a frame - gf_bits += cpi->rc.min_frame_bandwidth; - if (i == 0) { cpi->twopass.gf_bits = gf_bits; } @@ -1899,8 +1885,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->twopass.gf_group_error_left = (int64_t)gf_group_err; } - cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - - cpi->rc.min_frame_bandwidth; + cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; @@ -1985,9 +1970,6 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; - // Add in the minimum number of bits that is set aside for every frame. - target_frame_size += cpi->rc.min_frame_bandwidth; - // Per frame bit target for this frame. cpi->rc.per_frame_bandwidth = target_frame_size; } @@ -2029,6 +2011,22 @@ void vp9_get_one_pass_params(VP9_COMP *cpi) { } } +void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if ((cm->current_video_frame == 0 || + cm->frame_flags & FRAMEFLAGS_KEY || + cpi->rc.frames_to_key == 0 || + (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { + cm->frame_type = KEY_FRAME; + cpi->rc.frames_to_key = cpi->key_frame_frequency; + } else { + cm->frame_type = INTER_FRAME; + } + // Don't use gf_update by default in CBR mode. + cpi->rc.frames_till_gf_update_due = INT_MAX; + cpi->rc.baseline_gf_interval = INT_MAX; +} + void vp9_get_first_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (!cpi->refresh_alt_ref_frame && @@ -2038,8 +2036,8 @@ void vp9_get_first_pass_params(VP9_COMP *cpi) { } else { cm->frame_type = INTER_FRAME; } - cpi->rc.frames_to_key = INT_MAX; // Do not use periodic key frames + cpi->rc.frames_to_key = INT_MAX; } void vp9_get_second_pass_params(VP9_COMP *cpi) { @@ -2265,8 +2263,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { vp9_zero(next_frame); vp9_clear_system_state(); // __asm emms; - start_position = cpi->twopass.stats_in; + start_position = cpi->twopass.stats_in; cpi->common.frame_type = KEY_FRAME; // is this a forced key frame by interval @@ -2348,7 +2346,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // interval is between 1x and 2x if (cpi->oxcf.auto_key && cpi->rc.frames_to_key > (int)cpi->key_frame_frequency) { - FIRSTPASS_STATS *current_pos = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_frame; cpi->rc.frames_to_key /= 2; @@ -2373,15 +2370,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Load a the next frame's stats input_stats(&cpi->twopass, &tmp_frame); } - - // Reset to the start of the group - reset_fpf_position(&cpi->twopass, current_pos); - + cpi->rc.next_key_frame_forced = 1; + } else if (cpi->twopass.stats_in == cpi->twopass.stats_in_end) { cpi->rc.next_key_frame_forced = 1; } else { cpi->rc.next_key_frame_forced = 0; } - // Special case for the last frame of the file + + // Special case for the last key frame of the file if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { // Accumulate kf group error kf_group_err += calculate_modified_err(cpi, this_frame); @@ -2566,8 +2562,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; - // Add in the minimum frame allowance - cpi->twopass.kf_bits += cpi->rc.min_frame_bandwidth; // Peer frame bit target for this frame cpi->rc.per_frame_bandwidth = cpi->twopass.kf_bits; diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 43703c2c5..f89e4cb1c 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -22,6 +22,7 @@ void vp9_end_second_pass(VP9_COMP *cpi); void vp9_get_first_pass_params(VP9_COMP *cpi); void vp9_get_one_pass_params(VP9_COMP *cpi); +void vp9_get_one_pass_cbr_params(VP9_COMP *cpi); void vp9_get_svc_params(VP9_COMP *cpi); #endif // VP9_ENCODER_VP9_FIRSTPASS_H_ diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 946ed2818..8e60bc96d 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -581,22 +581,21 @@ static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) { sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } -static void set_rt_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - sf->static_segmentation = 0; +static void set_good_speed_feature(VP9_COMMON *cm, + SPEED_FEATURES *sf, + int speed) { + int i; sf->adaptive_rd_thresh = 1; sf->recode_loop = (speed < 1); - if (speed >= 1) { + if (speed == 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) + ? USE_FULL_RD : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : - DISABLE_ALL_INTER_SPLIT; + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; else sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; @@ -610,26 +609,26 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } - if (speed >= 2) { + if (speed == 2) { sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) + ? USE_FULL_RD : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : - DISABLE_ALL_INTER_SPLIT; + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; else sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->adaptive_pred_filter_type = 2; + sf->reference_masking = 1; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 50; @@ -649,7 +648,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } - if (speed >= 3) { + if (speed == 3) { sf->use_square_partition_only = 1; sf->tx_size_search_method = USE_LARGESTALL; @@ -658,13 +657,15 @@ static void set_rt_speed_feature(VP9_COMMON *cm, else sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->adaptive_pred_filter_type = 2; + sf->reference_masking = 1; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 100; @@ -684,19 +685,22 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; } - if (speed >= 4) { + if (speed == 4) { sf->use_square_partition_only = 1; sf->tx_size_search_method = USE_LARGESTALL; sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_COMP_REFMISMATCH | FLAG_SKIP_INTRA_LOWVAR - | FLAG_EARLY_TERMINATE; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->adaptive_pred_filter_type = 2; + sf->reference_masking = 1; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 200; @@ -715,30 +719,24 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; - - /* sf->intra_y_mode_mask = INTRA_DC_ONLY; - sf->intra_uv_mode_mask = INTRA_DC_ONLY; - sf->search_method = BIGDIA; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 64; */ } - if (speed >= 5) { - int i; + if (speed == 5) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->use_one_partition_size_always = 1; sf->always_this_block_size = BLOCK_16X16; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_COMP_REFMISMATCH | FLAG_SKIP_INTRA_LOWVAR - | FLAG_EARLY_TERMINATE; + sf->tx_size_search_method = frame_is_intra_only(cm) ? + USE_FULL_RD : USE_LARGESTALL; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; sf->use_rd_breakout = 1; sf->use_lp32x32fdct = 1; sf->optimize_coefficients = 0; sf->auto_mv_step_size = 1; - // sf->reduce_first_step_size = 1; - // sf->reference_masking = 1; + sf->reference_masking = 1; sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->search_method = HEX; @@ -754,6 +752,107 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->mode_skip_start = 6; } } +static void set_rt_speed_feature(VP9_COMMON *cm, + SPEED_FEATURES *sf, + int speed) { + sf->static_segmentation = 0; + sf->adaptive_rd_thresh = 1; + sf->recode_loop = (speed < 1); + if (speed == 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = + frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->recode_loop = 2; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + if (speed >= 2) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = + frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH + | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA + | FLAG_SKIP_INTRA_LOWVAR; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; + sf->auto_mv_step_size = 1; + sf->reference_masking = 1; + + sf->disable_filter_search_var_thresh = 50; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = 1; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->adaptive_rd_thresh = 2; + sf->recode_loop = 2; + sf->use_lp32x32fdct = 1; + sf->mode_skip_start = 11; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + if (speed >= 3) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH + | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA + | FLAG_SKIP_INTRA_LOWVAR; + + sf->disable_filter_search_var_thresh = 100; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = 2; + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + } + if (speed >= 4) { + sf->optimize_coefficients = 0; + } + if (speed >= 5) { + int i; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + } +} void vp9_set_speed_features(VP9_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; @@ -772,7 +871,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // best quality defaults sf->RD = 1; sf->search_method = NSTEP; - sf->auto_filter = 1; sf->recode_loop = 1; sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; @@ -812,199 +910,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->using_small_partition_info = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set -#if CONFIG_MULTIPLE_ARF - // Switch segmentation off. - sf->static_segmentation = 0; -#else - sf->static_segmentation = 0; -#endif - switch (mode) { case 0: // This is the best quality mode. cpi->diamond_search_sad = vp9_full_range_search; break; - case 1: - -#if CONFIG_MULTIPLE_ARF - // Switch segmentation off. - sf->static_segmentation = 0; -#else - sf->static_segmentation = 0; -#endif - sf->adaptive_rd_thresh = 1; - sf->recode_loop = (speed < 1); - - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = 1; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = 1; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed == 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 200; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = 1; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - - /* sf->intra_y_mode_mask = INTRA_DC_ONLY; - sf->intra_uv_mode_mask = INTRA_DC_ONLY; - sf->search_method = BIGDIA; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 64; */ - } - if (speed == 5) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->use_one_partition_size_always = 1; - sf->always_this_block_size = BLOCK_16X16; - sf->tx_size_search_method = frame_is_intra_only(cm) ? - USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->use_rd_breakout = 1; - sf->use_lp32x32fdct = 1; - sf->optimize_coefficients = 0; - sf->auto_mv_step_size = 1; - // sf->reduce_first_step_size = 1; - // sf->reference_masking = 1; - - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->search_method = HEX; - sf->subpel_iters_per_step = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } + set_good_speed_feature(cm, sf, speed); + break; break; case 2: set_rt_speed_feature(cm, sf, speed); @@ -1350,8 +1262,6 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - // cpi->use_golden_frame_only = 0; - // cpi->use_last_frame_only = 0; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 1; cm->refresh_frame_context = 1; @@ -1392,6 +1302,12 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { else cpi->oxcf.maximum_buffer_size = rescale(cpi->oxcf.maximum_buffer_size, cpi->oxcf.target_bandwidth, 1000); + // Under a configuration change, where maximum_buffer_size may change, + // keep buffer level clipped to the maximum allowed buffer size. + if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) { + cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size; + cpi->rc.buffer_level = cpi->rc.bits_off_target; + } // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); @@ -1453,6 +1369,9 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { #endif set_tile_limits(cpi); + + cpi->ext_refresh_frame_flags_pending = 0; + cpi->ext_refresh_frame_context_pending = 0; } #define M_LOG2_E 0.693147180559945309417 @@ -2257,19 +2176,20 @@ int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) { if (ref_frame_flags > 7) return -1; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt_ref_frame = 0; - cpi->refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 0; + cpi->ext_refresh_alt_ref_frame = 0; + cpi->ext_refresh_last_frame = 0; if (ref_frame_flags & VP9_LAST_FLAG) - cpi->refresh_last_frame = 1; + cpi->ext_refresh_last_frame = 1; if (ref_frame_flags & VP9_GOLD_FLAG) - cpi->refresh_golden_frame = 1; + cpi->ext_refresh_golden_frame = 1; if (ref_frame_flags & VP9_ALT_FLAG) - cpi->refresh_alt_ref_frame = 1; + cpi->ext_refresh_alt_ref_frame = 1; + cpi->ext_refresh_frame_flags_pending = 1; return 0; } @@ -2325,7 +2245,8 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, } int vp9_update_entropy(VP9_PTR comp, int update) { - ((VP9_COMP *)comp)->common.refresh_frame_context = update; + ((VP9_COMP *)comp)->ext_refresh_frame_context = update; + ((VP9_COMP *)comp)->ext_refresh_frame_context_pending = 1; return 0; } @@ -2975,6 +2896,23 @@ static void get_ref_frame_flags(VP9_COMP *cpi) { cpi->ref_frame_flags &= ~VP9_ALT_FLAG; } +static void set_ext_overrides(VP9_COMP *cpi) { + // Overrides the defaults with the externally supplied values with + // vp9_update_reference() and vp9_update_entropy() calls + // Note: The overrides are valid only for the next frame passed + // to encode_frame_to_data_rate() function + if (cpi->ext_refresh_frame_context_pending) { + cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context; + cpi->ext_refresh_frame_context_pending = 0; + } + if (cpi->ext_refresh_frame_flags_pending) { + cpi->refresh_last_frame = cpi->ext_refresh_last_frame; + cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame; + cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame; + cpi->ext_refresh_frame_flags_pending = 0; + } +} + static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -2991,6 +2929,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height); struct segmentation *const seg = &cm->seg; + set_ext_overrides(cpi); + /* Scale the source buffer, if required. */ if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width || cm->mi_rows * 8 != cpi->un_scaled_source->y_height) { @@ -3166,6 +3106,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); } + // If the encoder forced a KEY_FRAME decision if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1; @@ -3318,7 +3259,11 @@ static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { - vp9_get_one_pass_params(cpi); + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + vp9_get_one_pass_cbr_params(cpi); + } else { + vp9_get_one_pass_params(cpi); + } encode_frame_to_data_rate(cpi, size, dest, frame_flags); } @@ -3396,6 +3341,44 @@ int is_next_frame_arf(VP9_COMP *cpi) { } #endif +void adjust_frame_rate(VP9_COMP *cpi) { + int64_t this_duration; + int step = 0; + + if (cpi->source->ts_start == cpi->first_time_stamp_ever) { + this_duration = cpi->source->ts_end - cpi->source->ts_start; + step = 1; + } else { + int64_t last_duration = cpi->last_end_time_stamp_seen + - cpi->last_time_stamp_seen; + + this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; + + // do a step update if the duration changes by 10% + if (last_duration) + step = (int)((this_duration - last_duration) * 10 / last_duration); + } + + if (this_duration) { + if (step) { + vp9_new_framerate(cpi, 10000000.0 / this_duration); + } else { + // Average this frame's rate into the last second's average + // frame rate. If we haven't seen 1 second yet, then average + // over the whole interval seen. + const double interval = MIN((double)(cpi->source->ts_end + - cpi->first_time_stamp_ever), 10000000.0); + double avg_duration = 10000000.0 / cpi->oxcf.framerate; + avg_duration *= (interval - avg_duration + this_duration); + avg_duration /= interval; + + vp9_new_framerate(cpi, 10000000.0 / avg_duration); + } + } + cpi->last_time_stamp_seen = cpi->source->ts_start; + cpi->last_end_time_stamp_seen = cpi->source->ts_end; +} + int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { @@ -3415,6 +3398,13 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); + // Normal defaults + cm->reset_frame_context = 0; + cm->refresh_frame_context = 1; + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + // Should we code an alternate reference frame. if (cpi->oxcf.play_alternate && cpi->rc.source_alt_ref_pending) { int frames_to_arf; @@ -3425,7 +3415,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->multi_arf_enabled && (cpi->pass == 2)) frames_to_arf = (-cpi->frame_coding_order[cpi->sequence_number]) - - cpi->next_frame_in_order; + - cpi->next_frame_in_order; else #endif frames_to_arf = cpi->rc.frames_till_gf_update_due; @@ -3509,18 +3499,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; - // fprintf(fp_out, " Frame:%d", cm->current_video_frame); -#if CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled) { - // fprintf(fp_out, " seq_no:%d this_frame_weight:%d", - // cpi->sequence_number, cpi->this_frame_weight); - } else { - // fprintf(fp_out, "\n"); - } -#else - // fprintf(fp_out, "\n"); -#endif - #if CONFIG_MULTIPLE_ARF if ((cm->frame_type != KEY_FRAME) && (cpi->pass == 2)) cpi->rc.source_alt_ref_pending = is_next_frame_arf(cpi); @@ -3542,43 +3520,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, } // adjust frame rates based on timestamps given - if (!cpi->refresh_alt_ref_frame) { - int64_t this_duration; - int step = 0; - - if (cpi->source->ts_start == cpi->first_time_stamp_ever) { - this_duration = cpi->source->ts_end - cpi->source->ts_start; - step = 1; - } else { - int64_t last_duration = cpi->last_end_time_stamp_seen - - cpi->last_time_stamp_seen; - - this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; - - // do a step update if the duration changes by 10% - if (last_duration) - step = (int)((this_duration - last_duration) * 10 / last_duration); - } - - if (this_duration) { - if (step) { - vp9_new_framerate(cpi, 10000000.0 / this_duration); - } else { - // Average this frame's rate into the last second's average - // frame rate. If we haven't seen 1 second yet, then average - // over the whole interval seen. - const double interval = MIN((double)(cpi->source->ts_end - - cpi->first_time_stamp_ever), 10000000.0); - double avg_duration = 10000000.0 / cpi->oxcf.framerate; - avg_duration *= (interval - avg_duration + this_duration); - avg_duration /= interval; - - vp9_new_framerate(cpi, 10000000.0 / avg_duration); - } - } - - cpi->last_time_stamp_seen = cpi->source->ts_start; - cpi->last_end_time_stamp_seen = cpi->source->ts_end; + if (cm->show_frame) { + adjust_frame_rate(cpi); } // start with a 0 size frame @@ -3604,21 +3547,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, } #endif -#if 0 // CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled) { - fprintf(fp_out, " idx(%d, %d, %d, %d) active(%d, %d, %d)", - cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx, cm->new_fb_idx, - cm->ref_frame_map[cpi->lst_fb_idx], - cm->ref_frame_map[cpi->gld_fb_idx], - cm->ref_frame_map[cpi->alt_fb_idx]); - if (cpi->refresh_alt_ref_frame) - fprintf(fp_out, " type:ARF"); - if (cpi->rc.is_src_frame_alt_ref) - fprintf(fp_out, " type:OVERLAY[%d]", cpi->alt_fb_idx); - fprintf(fp_out, "\n"); - } -#endif - cm->frame_flags = *frame_flags; // Reset the frame pointers to the current frame size @@ -3669,15 +3597,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, } if (*size > 0) { - // if its a dropped frame honor the requests on subsequent frames cpi->droppable = !frame_is_reference(cpi); - - // return to normal state - cm->reset_frame_context = 0; - cm->refresh_frame_context = 1; - cpi->refresh_alt_ref_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_last_frame = 1; } vpx_usec_timer_mark(&cmptimer); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 946424534..a5be0f424 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -232,57 +232,185 @@ typedef enum { } LAST_FRAME_PARTITION_METHOD; typedef struct { + // This flag refers to whether or not to perform rd optimization. int RD; + + // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; - int auto_filter; + + // Recode_loop can be: + // 0 means we only encode a frame once + // 1 means we can re-encode based on bitrate constraints on any frame + // 2 means we can only recode gold, alt, and key frames. int recode_loop; + + // Subpel_search_method can only be subpel_tree which does a subpixel + // logarithmic search that keeps stepping at 1/2 pixel units until + // you stop getting a gain, and then goes on to 1/4 and repeats + // the same process. Along the way it skips many diagonals. SUBPEL_SEARCH_METHODS subpel_search_method; + + // Maximum number of steps in logarithmic subpel search before giving up. int subpel_iters_per_step; + + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. int thresh_mult[MAX_MODES]; int thresh_mult_sub8x8[MAX_REFS]; + + // This parameter controls the number of steps we'll do in a diamond + // search. int max_step_search_steps; + + // This parameter controls which step in the n-step process we start at. + // It's changed adaptively based on circumstances. int reduce_first_step_size; + + // If this is set to 1, we limit the motion search range to 2 times the + // largest motion vector found in the last frame. int auto_mv_step_size; + + // Trellis (dynamic programming) optimization of quantized values (+1, 0). int optimize_coefficients; + + // Always set to 0. If on it enables 0 cost background transmission + // (except for the initial transmission of the segmentation). The feature is + // disabled because the addition of very large block sizes make the + // backgrounds very to cheap to encode, and the segmentation we have + // adds overhead. int static_segmentation; + + // If 1 we iterate finding a best reference for 2 ref frames together - via + // a log search that iterates 4 times (check around mv for last for best + // error of combined predictor then check around mv for alt). If 0 we + // we just use the best motion vector found for each frame by itself. int comp_inter_joint_search_thresh; + + // This variable is used to cap the maximum number of times we skip testing a + // mode to be evaluated. A high value means we will be faster. int adaptive_rd_thresh; + + // Enables skipping the reconstruction step (idct, recon) in the + // intermediate steps assuming the last frame didn't have too many intra + // blocks and the q is less than a threshold. int skip_encode_sb; int skip_encode_frame; + + // This variable allows us to reuse the last frames partition choices + // (64x64 v 32x32 etc) for this frame. It can be set to only use the last + // frame as a starting point in low motion scenes or always use it. If set + // we use last partitioning_redo frequency to determine how often to redo + // the partitioning from scratch. Adjust_partitioning_from_last_frame + // enables us to adjust up or down one partitioning from the last frames + // partitioning. LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + + // Determine which method we use to determine transform size. We can choose + // between options like full rd, largest for prediction size, largest + // for intra and model coefs for the rest. TX_SIZE_SEARCH_METHOD tx_size_search_method; + + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less + // precise but significantly faster than the non lp version. int use_lp32x32fdct; + + // TODO(JBB): remove this as its no longer used. + + // If set partition size will always be always_this_block_size. int use_one_partition_size_always; + + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) int use_square_partition_only; + + // After looking at the first set of modes (set by index here), skip + // checking modes for reference frames that don't match the reference frame + // of the best so far. int mode_skip_start; + + // TODO(JBB): Remove this. int reference_masking; + + // Used in conjunction with use_one_partition_size_always. BLOCK_SIZE always_this_block_size; + + // Sets min and max partition sizes for this 64x64 region based on the + // same superblock in last encoded frame, and the left and above neighbor + // in this block. int auto_min_max_partition_size; + + // Min and max partition size we enable (block_size) as per auto + // min max, but also used by adjust partitioning, and pick_partitioning. BLOCK_SIZE min_partition_size; BLOCK_SIZE max_partition_size; + + // Whether or not we allow partitions one smaller or one greater than the last + // frame's partitioning. Only used if use_lastframe_partitioning is set. int adjust_partitioning_from_last_frame; + + // How frequently we re do the partitioning from scratch. Only used if + // use_lastframe_partitioning is set. int last_partitioning_redo_frequency; + + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable + // it always, to allow it for only Last frame and Intra, disable it for all + // inter modes or to enable it always. int disable_split_mask; + + // TODO(jbb): Remove this and everything that uses it. It's only valid if + // we were doing small to large partition checks. We currently do the + // reverse. int using_small_partition_info; + // TODO(jingning): combine the related motion search speed features + // This allows us to use motion search at other sizes as a starting + // point for this motion search and limits the search range around it. int adaptive_motion_search; + + // Allows sub 8x8 modes to use the prediction filter that was determined + // best for 8x8 mode. If set to 0 we always re check all the filters for + // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter + // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. int adaptive_pred_filter_type; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags // defined in the MODE_SEARCH_SKIP_HEURISTICS enum unsigned int mode_search_skip_flags; + // A source variance threshold below which the split mode is disabled unsigned int disable_split_var_thresh; + // A source variance threshold below which filter search is disabled // Choose a very large value (UINT_MAX) to use 8-tap always unsigned int disable_filter_search_var_thresh; + + // These bit masks allow you to enable or disable intra modes for each + // transform size separately. int intra_y_mode_mask[TX_SIZES]; int intra_uv_mode_mask[TX_SIZES]; + + // This variable enables an early break out of mode testing if the model for + // rd built from the prediction signal indicates a value that's much + // higher than the best rd we've seen so far. int use_rd_breakout; + + // This enables us to use an estimate for intra rd based on dc mode rather + // than choosing an actual uv mode in the stage of encoding before the actual + // final encode. int use_uv_intra_rd_estimate; + + // This picks a loop filter strength by trying a small portion of the image + // with different values. int use_fast_lpf_pick; + + // This feature limits the number of coefficients updates we actually do + // by only looking at counts from 1/2 the bands. int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced } SPEED_FEATURES; @@ -400,6 +528,15 @@ typedef struct VP9_COMP { int refresh_last_frame; int refresh_golden_frame; int refresh_alt_ref_frame; + + int ext_refresh_frame_flags_pending; + int ext_refresh_last_frame; + int ext_refresh_golden_frame; + int ext_refresh_alt_ref_frame; + + int ext_refresh_frame_context_pending; + int ext_refresh_frame_context; + YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tok; @@ -596,7 +733,7 @@ typedef struct VP9_COMP { int *mb_norm_activity_map; int output_partition; - /* force next frame to intra when kf_auto says so */ + // Force next frame to intra when kf_auto says so. int force_next_frame_intra; int droppable; @@ -638,7 +775,7 @@ typedef struct VP9_COMP { int64_t mode_test_hits[BLOCK_SIZES]; #endif - /* Y,U,V,(A) */ + // Y,U,V,(A) ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 31b0116a4..939a7f998 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -184,8 +184,6 @@ void vp9_setup_key_frame(VP9_COMP *cpi) { vp9_setup_past_independence(cm); - // interval before next GF - cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval; /* All buffers are implicitly updated on key frames. */ cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; @@ -432,10 +430,32 @@ static void calc_pframe_target_size(VP9_COMP *const cpi) { } } +static double get_rate_correction_factor(const VP9_COMP *cpi) { + if (cpi->common.frame_type == KEY_FRAME) { + return cpi->rc.key_frame_rate_correction_factor; + } else { + if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) + return cpi->rc.gf_rate_correction_factor; + else + return cpi->rc.rate_correction_factor; + } +} + +static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { + if (cpi->common.frame_type == KEY_FRAME) { + cpi->rc.key_frame_rate_correction_factor = factor; + } else { + if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) + cpi->rc.gf_rate_correction_factor = factor; + else + cpi->rc.rate_correction_factor = factor; + } +} + void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { const int q = cpi->common.base_qindex; int correction_factor = 100; - double rate_correction_factor; + double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; int projected_size_based_on_q = 0; @@ -443,15 +463,6 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { // Clear down mmx registers to allow floating point in what follows vp9_clear_system_state(); // __asm emms; - if (cpi->common.frame_type == KEY_FRAME) { - rate_correction_factor = cpi->rc.key_frame_rate_correction_factor; - } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) - rate_correction_factor = cpi->rc.gf_rate_correction_factor; - else - rate_correction_factor = cpi->rc.rate_correction_factor; - } - // Work out how big we would have expected the frame to be at this Q given // the current correction factor. // Stay in double to avoid int overflow when values are large @@ -501,36 +512,16 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { rate_correction_factor = MIN_BPB_FACTOR; } - if (cpi->common.frame_type == KEY_FRAME) { - cpi->rc.key_frame_rate_correction_factor = rate_correction_factor; - } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) - cpi->rc.gf_rate_correction_factor = rate_correction_factor; - else - cpi->rc.rate_correction_factor = rate_correction_factor; - } + set_rate_correction_factor(cpi, rate_correction_factor); } int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, int active_best_quality, int active_worst_quality) { int q = active_worst_quality; - - int i; int last_error = INT_MAX; - int target_bits_per_mb; - int bits_per_mb_at_this_q; - double correction_factor; - - // Select the appropriate correction factor based upon type of frame. - if (cpi->common.frame_type == KEY_FRAME) { - correction_factor = cpi->rc.key_frame_rate_correction_factor; - } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) - correction_factor = cpi->rc.gf_rate_correction_factor; - else - correction_factor = cpi->rc.rate_correction_factor; - } + int i, target_bits_per_mb, bits_per_mb_at_this_q; + const double correction_factor = get_rate_correction_factor(cpi); // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. @@ -855,7 +846,6 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { // Update the Golden frame usage counts. if (cpi->refresh_golden_frame) { // this frame refreshes means next frames don't unless specified by user - cpi->refresh_golden_frame = 0; cpi->rc.frames_since_golden = 0; if (!cpi->rc.source_alt_ref_pending) @@ -876,36 +866,35 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; // Update rate control heuristics - cpi->rc.projected_frame_size = (bytes_used << 3); + rc->projected_frame_size = (bytes_used << 3); // Post encode loop adjustment of Q prediction. - vp9_rc_update_rate_correction_factors( - cpi, (cpi->sf.recode_loop || + vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop || cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { - cpi->rc.last_q[KEY_FRAME] = cm->base_qindex; - cpi->rc.avg_frame_qindex[KEY_FRAME] = - (2 + 3 * cpi->rc.avg_frame_qindex[KEY_FRAME] + cm->base_qindex) >> 2; - } else if (!cpi->rc.is_src_frame_alt_ref && + rc->last_q[KEY_FRAME] = cm->base_qindex; + rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO( + 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); + } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - cpi->rc.last_q[2] = cm->base_qindex; - cpi->rc.avg_frame_qindex[2] = - (2 + 3 * cpi->rc.avg_frame_qindex[2] + cm->base_qindex) >> 2; + rc->last_q[2] = cm->base_qindex; + rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( + 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); } else { - cpi->rc.last_q[INTER_FRAME] = cm->base_qindex; - cpi->rc.avg_frame_qindex[INTER_FRAME] = - (2 + 3 * cpi->rc.avg_frame_qindex[INTER_FRAME] + - cm->base_qindex) >> 2; - cpi->rc.ni_frames++; - cpi->rc.tot_q += vp9_convert_qindex_to_q(cm->base_qindex); - cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames; + rc->last_q[INTER_FRAME] = cm->base_qindex; + rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO( + 3 * rc->avg_frame_qindex[INTER_FRAME] + cm->base_qindex, 2); + rc->ni_frames++; + rc->tot_q += vp9_convert_qindex_to_q(cm->base_qindex); + rc->avg_q = rc->tot_q / (double)rc->ni_frames; // Calculate the average Q for normal inter frames (not key or GFU frames). - cpi->rc.ni_tot_qi += cm->base_qindex; - cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames; + rc->ni_tot_qi += cm->base_qindex; + rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; } // Keep record of last boosted (KF/KF/ARF) Q value. @@ -913,38 +902,34 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // If all mbs in this group are skipped only update if the Q value is // better than that already stored. // This is used to help set quality in forced key frames to reduce popping - if ((cm->base_qindex < cpi->rc.last_boosted_qindex) || + if ((cm->base_qindex < rc->last_boosted_qindex) || ((cpi->static_mb_pct < 100) && ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)))) { - cpi->rc.last_boosted_qindex = cm->base_qindex; + (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { + rc->last_boosted_qindex = cm->base_qindex; } - vp9_update_buffer_level(cpi, cpi->rc.projected_frame_size); + vp9_update_buffer_level(cpi, rc->projected_frame_size); // Rolling monitors of whether we are over or underspending used to help // regulate min and Max Q in two pass. if (cm->frame_type != KEY_FRAME) { - cpi->rc.rolling_target_bits = - ((cpi->rc.rolling_target_bits * 3) + - cpi->rc.this_frame_target + 2) / 4; - cpi->rc.rolling_actual_bits = - ((cpi->rc.rolling_actual_bits * 3) + - cpi->rc.projected_frame_size + 2) / 4; - cpi->rc.long_rolling_target_bits = - ((cpi->rc.long_rolling_target_bits * 31) + - cpi->rc.this_frame_target + 16) / 32; - cpi->rc.long_rolling_actual_bits = - ((cpi->rc.long_rolling_actual_bits * 31) + - cpi->rc.projected_frame_size + 16) / 32; + rc->rolling_target_bits = ROUND_POWER_OF_TWO( + rc->rolling_target_bits * 3 + rc->this_frame_target, 2); + rc->rolling_actual_bits = ROUND_POWER_OF_TWO( + rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2); + rc->long_rolling_target_bits = ROUND_POWER_OF_TWO( + rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5); + rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO( + rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5); } // Actual bits spent - cpi->rc.total_actual_bits += cpi->rc.projected_frame_size; + rc->total_actual_bits += rc->projected_frame_size; // Debug stats - cpi->rc.total_target_vs_actual += (cpi->rc.this_frame_target - - cpi->rc.projected_frame_size); + rc->total_target_vs_actual += (rc->this_frame_target - + rc->projected_frame_size); #ifndef DISABLE_RC_LONG_TERM_MEM // Update bits left to the kf and gf groups to account for overshoot or @@ -962,8 +947,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } #endif - if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame - && (cm->frame_type != KEY_FRAME)) + if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame && + (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); else @@ -971,10 +956,10 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { update_golden_frame_stats(cpi); if (cm->frame_type == KEY_FRAME) - cpi->rc.frames_since_key = 0; + rc->frames_since_key = 0; if (cm->show_frame) { - cpi->rc.frames_since_key++; - cpi->rc.frames_to_key--; + rc->frames_since_key++; + rc->frames_to_key--; } } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 97dc1e0ff..81d47de92 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2124,10 +2124,10 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, cpi->common.show_frame && block_size < cpi->sf.max_partition_size); - int_mv pred_mv[3] = { - mbmi->ref_mvs[ref_frame][0], mbmi->ref_mvs[ref_frame][1], - x->pred_mv[ref_frame] - }; + int_mv pred_mv[3]; + pred_mv[0] = mbmi->ref_mvs[ref_frame][0]; + pred_mv[1] = mbmi->ref_mvs[ref_frame][1]; + pred_mv[2] = x->pred_mv[ref_frame]; // Get the sad for each candidate reference mv for (i = 0; i < num_mv_refs; i++) { @@ -2276,14 +2276,14 @@ static void setup_pred_block(const MACROBLOCKD *xd, } } -static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - int idx, MV_REFERENCE_FRAME frame_type, - BLOCK_SIZE block_size, - int mi_row, int mi_col, - int_mv frame_nearest_mv[MAX_REF_FRAMES], - int_mv frame_near_mv[MAX_REF_FRAMES], - struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { +void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + int idx, MV_REFERENCE_FRAME frame_type, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { VP9_COMMON *cm = &cpi->common; YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; MACROBLOCKD *const xd = &x->e_mbd; @@ -2355,9 +2355,10 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref); - int_mv pred_mv[3] = { - mbmi->ref_mvs[ref][0], mbmi->ref_mvs[ref][1], x->pred_mv[ref] - }; + int_mv pred_mv[3]; + pred_mv[0] = mbmi->ref_mvs[ref][0]; + pred_mv[1] = mbmi->ref_mvs[ref][1]; + pred_mv[2] = x->pred_mv[ref]; if (scaled_ref_frame) { int i; @@ -3174,17 +3175,29 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), - ref_frame, block_size, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); + vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), + ref_frame, block_size, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + cpi->ref_frame_mask = 0; + for (ref_frame = LAST_FRAME; + ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { + int i; + for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { + if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { + cpi->ref_frame_mask |= (1 << ref_frame); + break; + } + } + } + for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3234,8 +3247,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } // Skip if the current reference frame has been masked off - if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && - (cpi->ref_frame_mask & (1 << ref_frame))) + if (cpi->ref_frame_mask & (1 << ref_frame) && this_mode != NEWMV) continue; // Test best rd so far against threshold for trying this mode. @@ -3640,11 +3652,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - // If we are using reference masking and the set mask flag is set then - // create the reference frame mask. - if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) - cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame[0]); - // Flag all modes that have a distortion thats > 2x the best we found at // this level. for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { @@ -3796,15 +3803,27 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), - ref_frame, block_size, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - yv12_mb); + vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), + ref_frame, block_size, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], + yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + cpi->ref_frame_mask = 0; + for (ref_frame = LAST_FRAME; + ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { + int i; + for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { + if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { + cpi->ref_frame_mask |= (1 << ref_frame); + break; + } + } + } + for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3852,11 +3871,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, continue; } - // Skip if the current reference frame has been masked off - if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && - (cpi->ref_frame_mask & (1 << ref_frame))) - continue; - // Test best rd so far against threshold for trying this mode. if ((best_rd < ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * @@ -4366,11 +4380,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - // If we are using reference masking and the set mask flag is set then - // create the reference frame mask. - if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) - cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame[0]); - if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { *returnrate = INT_MAX; *returndistortion = INT_MAX; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index f0e8849c1..5732c2b2d 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -27,6 +27,15 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); +void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + int idx, MV_REFERENCE_FRAME frame_type, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE]); + void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index bd1150b98..b1c029cba 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -74,7 +74,6 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm -VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index a7d2e1d6a..a03f7befc 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -25,7 +25,7 @@ typedef vpx_codec_stream_info_t vp9_stream_info_t; /* Structures for handling memory allocations */ typedef enum { - VP9_SEG_ALG_PRIV = 256, + VP9_SEG_ALG_PRIV = 256, VP9_SEG_MAX } mem_seg_id_t; #define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) @@ -107,12 +107,11 @@ static void vp9_finalize_mmaps(vpx_codec_alg_priv_t *ctx) { static vpx_codec_err_t vp9_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { - vpx_codec_err_t res = VPX_CODEC_OK; + vpx_codec_err_t res = VPX_CODEC_OK; - /* This function only allocates space for the vpx_codec_alg_priv_t - * structure. More memory may be required at the time the stream - * information becomes known. - */ + // This function only allocates space for the vpx_codec_alg_priv_t + // structure. More memory may be required at the time the stream + // information becomes known. if (!ctx->priv) { vpx_codec_mmap_t mmap; @@ -122,12 +121,10 @@ static vpx_codec_err_t vp9_init(vpx_codec_ctx_t *ctx, mmap.flags = vp9_mem_req_segs[0].flags; res = vpx_mmap_alloc(&mmap); - if (!res) { vp9_init_ctx(ctx, &mmap); ctx->priv->alg_priv->defer_alloc = 1; - /*post processing level initialized to do nothing */ } } @@ -147,8 +144,7 @@ static vpx_codec_err_t vp9_destroy(vpx_codec_alg_priv_t *ctx) { return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_peek_si(const uint8_t *data, - unsigned int data_sz, +static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si) { if (data_sz <= 8) return VPX_CODEC_UNSUP_BITSTREAM; if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; @@ -213,13 +209,9 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, vpx_codec_stream_info_t *si) { - unsigned int sz; - - if (si->sz >= sizeof(vp9_stream_info_t)) - sz = sizeof(vp9_stream_info_t); - else - sz = sizeof(vpx_codec_stream_info_t); - + const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) + ? sizeof(vp9_stream_info_t) + : sizeof(vpx_codec_stream_info_t); memcpy(si, &ctx->si, sz); si->sz = sz; @@ -227,24 +219,17 @@ static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t -update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) { - vpx_codec_err_t res; - - if ((res = error->error_code)) - ctx->base.err_detail = error->has_detail - ? error->detail - : NULL; +static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, + const struct vpx_internal_error_info *error) { + if (error->error_code) + ctx->base.err_detail = error->has_detail ? error->detail : NULL; - return res; + return error->error_code; } -static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, - const uint8_t **data, - unsigned int data_sz, - void *user_priv, - long deadline) { +static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, + const uint8_t **data, unsigned int data_sz, + void *user_priv, int64_t deadline) { vpx_codec_err_t res = VPX_CODEC_OK; ctx->img_avail = 0; @@ -304,13 +289,11 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, oxcf.inv_tile_order = ctx->invert_tile_order; optr = vp9_create_decompressor(&oxcf); - /* If postprocessing was enabled by the application and a - * configuration has not been provided, default it. - */ - if (!ctx->postproc_cfg_set - && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { - ctx->postproc_cfg.post_proc_flag = - VP8_DEBLOCK | VP8_DEMACROBLOCK; + // If postprocessing was enabled by the application and a + // configuration has not been provided, default it. + if (!ctx->postproc_cfg_set && + (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { + ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; ctx->postproc_cfg.deblocking_level = 4; ctx->postproc_cfg.noise_level = 0; } @@ -354,25 +337,20 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { flags.post_proc_flag = #if CONFIG_POSTPROC_VISUALIZER - ((ctx->dbg_color_ref_frame_flag != 0) ? - VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) - | ((ctx->dbg_color_mb_modes_flag != 0) ? - VP9D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_color_b_modes_flag != 0) ? - VP9D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_display_mv_flag != 0) ? - VP9D_DEBUG_DRAW_MV : 0) - | + (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | + (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | + (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | + (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | #endif ctx->postproc_cfg.post_proc_flag; - flags.deblocking_level = ctx->postproc_cfg.deblocking_level; - flags.noise_level = ctx->postproc_cfg.noise_level; + flags.deblocking_level = ctx->postproc_cfg.deblocking_level; + flags.noise_level = ctx->postproc_cfg.noise_level; #if CONFIG_POSTPROC_VISUALIZER flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; - flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; - flags.display_mv_flag = ctx->dbg_display_mv_flag; + flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; + flags.display_mv_flag = ctx->dbg_display_mv_flag; #endif } @@ -391,10 +369,8 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, return res; } -static void parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], - int *count) { +static void parse_superframe_index(const uint8_t *data, size_t data_sz, + uint32_t sizes[8], int *count) { uint8_t marker; assert(data_sz); @@ -527,11 +503,11 @@ static vpx_codec_err_t vp9_set_frame_buffers( return VPX_CODEC_ERROR; } -static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx, - vpx_codec_mmap_t *mmap, - vpx_codec_iter_t *iter) { - vpx_codec_err_t res; - const mem_req_t *seg_iter = *iter; +static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx, + vpx_codec_mmap_t *mmap, + vpx_codec_iter_t *iter) { + vpx_codec_err_t res; + const mem_req_t *seg_iter = *iter; /* Get address of next segment request */ do { @@ -560,7 +536,7 @@ static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx, return res; } -static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx, +static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap) { vpx_codec_err_t res = VPX_CODEC_MEM_ERROR; int i, done; @@ -596,8 +572,7 @@ static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx, return res; } -static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, +static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -606,7 +581,6 @@ static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(ctx->pbi, (VP9_REFFRAME)frame->frame_type, &sd); } else { @@ -614,8 +588,7 @@ static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, +static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); @@ -632,8 +605,7 @@ static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, +static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); @@ -648,8 +620,7 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, - int ctr_id, +static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); @@ -666,8 +637,7 @@ static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, #endif } -static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, - int ctrl_id, +static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id, va_list args) { #if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC int data = va_arg(args, int); @@ -688,8 +658,7 @@ static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { + int ctrl_id, va_list args) { int *update_info = va_arg(args, int *); VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; @@ -704,8 +673,7 @@ static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { + int ctrl_id, va_list args) { int *corrupted = va_arg(args, int *); if (corrupted) { @@ -721,8 +689,7 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, } static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { + int ctrl_id, va_list args) { int *const display_size = va_arg(args, int *); if (display_size) { |