diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_dct.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeintra.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 106 |
4 files changed, 89 insertions, 46 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index e2f3e2677..9c2203dea 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -105,7 +105,6 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { int16_t out[4 * 4]; int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; int i, j; int16_t temp_in[4], temp_out[4]; @@ -137,7 +136,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output, // column transform for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) - temp_in[j] = input[j * short_pitch + i] << 4; + temp_in[j] = input[j * pitch + i] << 4; if (i == 0 && temp_in[0]) temp_in[0] += 1; fwdc(temp_in, temp_out); @@ -308,7 +307,6 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { int16_t out[64]; int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; int i, j; int16_t temp_in[8], temp_out[8]; @@ -339,7 +337,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, // column transform for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) - temp_in[j] = input[j * short_pitch + i] << 2; + temp_in[j] = input[j * pitch + i] << 2; fwdc(temp_in, temp_out); for (j = 0; j < 8; ++j) outptr[j * 8 + i] = temp_out[j]; @@ -697,7 +695,6 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type) { int16_t out[256]; int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; int i, j; int16_t temp_in[16], temp_out[16]; @@ -728,7 +725,7 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output, // column transform for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) - temp_in[j] = input[j * short_pitch + i] << 2; + temp_in[j] = input[j * pitch + i] << 2; fwdc(temp_in, temp_out); for (j = 0; j < 16; ++j) outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index ef64db1db..43bb4640c 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -54,9 +54,9 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { tx_type = get_tx_type_4x4(&x->e_mbd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type); + vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b) ; @@ -149,10 +149,10 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { tx_type = get_tx_type_8x8(xd, &xd->block[ib]); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type); + vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); x->quantize_b_8x8(x->block + idx, xd->block + idx); vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, - 32, tx_type); + 16, tx_type); } else { x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); @@ -164,9 +164,9 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type); + vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(be, be + 1, b, b + 1); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 61516ddec..a753bf40f 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -174,7 +174,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) { BLOCK *b = &x->block[i]; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(b->src_diff, b->coeff, 32, tx_type); + vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) { x->fwd_txm8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); @@ -209,7 +209,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { BLOCK *b = &x->block[i]; tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(b->src_diff, b->coeff, 32, tx_type); + vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type); } else { x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); @@ -219,7 +219,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) { BLOCK *b = &x->block[i]; tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 32, tx_type); + vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type); } else { x->fwd_txm8x8(&x->block[i].src_diff[0], &x->block[i + 2].coeff[0], 32); @@ -247,7 +247,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) { TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); vp9_clear_system_state(); if (tx_type != DCT_DCT) { - vp9_short_fht16x16(b->src_diff, b->coeff, 32, tx_type); + vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type); } else { x->fwd_txm16x16(&x->block[0].src_diff[0], &x->block[0].coeff[0], 32); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index d26b5ae7b..6e1122f3e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -402,6 +402,12 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT *const a1 = a + + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); + ENTROPY_CONTEXT *const l1 = l + + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); +#endif switch (tx_size) { case TX_4X4: @@ -416,6 +422,10 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, } break; case TX_8X8: +#if CONFIG_CNVCONTEXT + a_ec = (a[0] + a[1]) != 0; + l_ec = (l[0] + l[1]) != 0; +#endif scan = vp9_default_zig_zag1d_8x8; seg_eob = 64; break; @@ -425,12 +435,27 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, if (type == PLANE_TYPE_UV) { const int uv_idx = ib - 16; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; +#if CONFIG_CNVCONTEXT + a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; + } else { + a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; +#endif } break; case TX_32X32: scan = vp9_default_zig_zag1d_32x32; seg_eob = 1024; qcoeff_ptr = xd->sb_coeff_data.qcoeff; +#if CONFIG_CNVCONTEXT + a_ec = a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]; + l_ec = l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]; + a_ec = a_ec != 0; + l_ec = l_ec != 0; +#endif break; default: abort(); @@ -459,6 +484,22 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, // is eob first coefficient; pt = (c > 0); *a = *l = pt; +#if CONFIG_CNVCONTEXT + if (tx_size >= TX_8X8) { + a[1] = l[1] = pt; + if (tx_size >= TX_16X16) { + if (type == PLANE_TYPE_UV) { + a1[0] = a1[1] = l1[0] = l1[1] = pt; + } else { + a[2] = a[3] = l[2] = l[3] = pt; + if (tx_size >= TX_32X32) { + a1[0] = a1[1] = a1[2] = a1[3] = pt; + l1[0] = l1[1] = l1[2] = l1[3] = pt; + } + } + } + } +#endif return cost; } @@ -701,15 +742,15 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { MACROBLOCKD * const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta, *tl; if (backup) { ta = (ENTROPY_CONTEXT *) &t_above, tl = (ENTROPY_CONTEXT *) &t_left; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); } else { ta = (ENTROPY_CONTEXT *) xd->above_context; tl = (ENTROPY_CONTEXT *) xd->left_context; @@ -1013,7 +1054,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, b->bmi.as_mode.first = mode; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); @@ -1046,7 +1087,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, // inverse transform if (best_tx_type != DCT_DCT) - vp9_short_iht4x4(best_dqcoeff, b->diff, 32, best_tx_type); + vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type); else xd->inv_txm4x4(best_dqcoeff, b->diff, 32); @@ -1279,8 +1320,9 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int distortion = 0, rate = 0; BLOCK *be = x->block + ib; BLOCKD *b = xd->block + ib; - ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0; - ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0; + ENTROPY_CONTEXT_PLANES ta, tl; + ENTROPY_CONTEXT *ta0, *ta1, besta0 = 0, besta1 = 0; + ENTROPY_CONTEXT *tl0, *tl1, bestl0 = 0, bestl1 = 0; /* * The predictor buffer is a 2d buffer with a stride of 16. Create @@ -1309,7 +1351,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { TX_TYPE tx_type = get_tx_type_8x8(xd, b); if (tx_type != DCT_DCT) - vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type); + vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); else x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); @@ -1317,23 +1359,29 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // compute quantization mse of 8x8 block distortion = vp9_block_error_c((x->block + idx)->coeff, (xd->block + idx)->dqcoeff, 64); - ta0 = a[vp9_block2above[TX_8X8][idx]]; - tl0 = l[vp9_block2left[TX_8X8][idx]]; + + vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_8X8][idx]; + tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_8X8][idx]; + ta1 = ta0 + 1; + tl1 = tl0 + 1; rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl0, TX_8X8); + ta0, tl0, TX_8X8); rate += rate_t; - ta1 = ta0; - tl1 = tl0; } else { static const int iblock[4] = {0, 1, 4, 5}; TX_TYPE tx_type; int i; - ta0 = a[vp9_block2above[TX_4X4][ib]]; - ta1 = a[vp9_block2above[TX_4X4][ib + 1]]; - tl0 = l[vp9_block2left[TX_4X4][ib]]; - tl1 = l[vp9_block2left[TX_4X4][ib + 4]]; + vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); + ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_4X4][ib]; + tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_4X4][ib]; + ta1 = ta0 + 1; + tl1 = tl0 + 1; distortion = 0; rate_t = 0; for (i = 0; i < 4; ++i) { @@ -1342,7 +1390,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type); + vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(be, b, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); @@ -1354,15 +1402,13 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, - // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, - &ta0, &tl0, + i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); if (do_two) { + i++; rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC, - // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, - &ta0, &tl0, + i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); - i++; } } b = &xd->block[ib]; @@ -1376,10 +1422,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, *bestrate = rate; *bestratey = rate_t; *bestdistortion = distortion; - besta0 = ta0; - besta1 = ta1; - bestl0 = tl0; - bestl1 = tl1; + besta0 = *ta0; + besta1 = *ta1; + bestl0 = *tl0; + bestl1 = *tl1; best_rd = this_rd; *best_mode = mode; copy_predictor_8x8(best_predictor, b->predictor); @@ -1532,12 +1578,12 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { int b; int cost = 0; MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta, *tl; if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2); ta = (ENTROPY_CONTEXT *) &t_above; tl = (ENTROPY_CONTEXT *) &t_left; |