diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_blockd.h | 53 | ||||
-rw-r--r-- | vp9/common/vp9_invtrans.c | 107 | ||||
-rw-r--r-- | vp9/common/vp9_mbpitch.c | 5 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 11 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 321 | ||||
-rw-r--r-- | vp9/decoder/vp9_dequantize.c | 11 | ||||
-rw-r--r-- | vp9/decoder/vp9_dequantize.h | 5 | ||||
-rw-r--r-- | vp9/decoder/vp9_detokenize.c | 109 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.c | 89 | ||||
-rw-r--r-- | vp9/encoder/vp9_asm_enc_offsets.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeintra.c | 64 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeintra.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 55 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.c | 142 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.h | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 214 | ||||
-rw-r--r-- | vp9/encoder/vp9_tokenize.c | 118 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_encodeopt.asm | 114 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_x86_csystemdependent.c | 18 |
20 files changed, 745 insertions, 719 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3a5824ada..a147ec747 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -249,8 +249,6 @@ typedef struct { } MODE_INFO; typedef struct blockd { - int16_t *qcoeff; - int16_t *dqcoeff; uint8_t *predictor; int16_t *diff; int16_t *dequant; @@ -284,15 +282,28 @@ struct scale_factors { #endif }; +enum { MAX_MB_PLANE = 3 }; + +struct mb_plane { + DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[256]); +}; + +#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n)) + +#define MB_SUBBLOCK_FIELD(x, field, i) (\ + ((i) < 16) ? BLOCK_OFFSET((x)->plane[0].field, (i), 16) : \ + ((i) < 20) ? BLOCK_OFFSET((x)->plane[1].field, ((i) - 16), 16) : \ + BLOCK_OFFSET((x)->plane[2].field, ((i) - 20), 16)) + typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks - DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]); - DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]); #if CONFIG_CODE_NONZEROCOUNT DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); #endif + struct mb_plane plane[MAX_MB_PLANE]; /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ BLOCKD block[24]; @@ -372,8 +383,8 @@ typedef struct macroblockd { void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd); void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride, - struct macroblockd *xd); + uint8_t *pre, uint8_t *dst, int stride, + uint16_t *eobs); struct subpix_fn_table subpix; @@ -669,4 +680,32 @@ static int get_nzc_used(TX_SIZE tx_size) { return (tx_size >= TX_16X16); } #endif + +struct plane_block_idx { + int plane; + int block; +}; + +// TODO(jkoleszar): returning a struct so it can be used in a const context, +// expect to refactor this further later. +static INLINE struct plane_block_idx plane_block_idx(int y_blocks, + int b_idx) { + const int v_offset = y_blocks * 5 / 4; + struct plane_block_idx res; + + if (b_idx < y_blocks) { + res.plane = 0; + res.block = b_idx; + } else if (b_idx < v_offset) { + res.plane = 1; + res.block = b_idx - y_blocks; + } else { + assert(b_idx < y_blocks * 3 / 2); + res.plane = 2; + res.block = b_idx - v_offset; + } + return res; +} + + #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index a03a66e33..0573b7df4 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -26,9 +26,12 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { for (i = 0; i < 16; i++) { TX_TYPE tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { - vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), + xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + vp9_inverse_transform_b_4x4(xd, + xd->plane[0].eobs[i], + BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), xd->block[i].diff, 32); } } @@ -37,8 +40,14 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) { int i; - for (i = 16; i < 24; i++) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + for (i = 16; i < 20; i++) { + vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[i - 16], + BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16), + xd->block[i].diff, 16); + } + for (i = 20; i < 24; i++) { + vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[i - 20], + BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16), xd->block[i].diff, 16); } } @@ -60,19 +69,20 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { for (i = 0; i < 9; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, i); if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), + xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), &blockd[i].diff[0], 32); } } for (i = 2; i < 11; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, i); if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff, - 16, tx_type); + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16), + xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16), &blockd[i].diff[0], 32); } } @@ -82,8 +92,12 @@ void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) { int i; BLOCKD *blockd = xd->block; - for (i = 16; i < 24; i += 4) { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], + for (i = 16; i < 20; i += 4) { + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16), + &blockd[i].diff[0], 16); + } + for (i = 20; i < 24; i += 4) { + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16), &blockd[i].diff[0], 16); } } @@ -102,9 +116,10 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) { BLOCKD *bd = &xd->block[0]; TX_TYPE tx_type = get_tx_type_16x16(xd, 0); if (tx_type != DCT_DCT) { - vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type); + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), + bd->diff, 16, tx_type); } else { - vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), &xd->block[0].diff[0], 32); } } @@ -115,7 +130,7 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { } void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64); + vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), xd->diff, 64); } void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { @@ -126,11 +141,11 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 32 * 16, 64); } else { - vp9_short_iht16x16(xd->dqcoeff + n * 256, + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type); } } @@ -144,10 +159,10 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 32 * 8, 64); } else { - vp9_short_iht8x8(xd->dqcoeff + n * 64, + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type); } } @@ -161,19 +176,20 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], + BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 32, 64); } else { - vp9_short_iht4x4(xd->dqcoeff + n * 16, + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type); } } } void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024, + vp9_inverse_transform_b_16x16(xd->plane[1].dqcoeff, xd->diff + 1024, 32); - vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280, + vp9_inverse_transform_b_16x16(xd->plane[2].dqcoeff, xd->diff + 1280, 32); } @@ -183,10 +199,10 @@ void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8, 32); - vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8, 32); } @@ -198,12 +214,12 @@ void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) { for (n = 0; n < 16; n++) { const int x_idx = n & 3, y_idx = n >> 2; - vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n], - xd->dqcoeff + 1024 + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], + BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4, 32); - vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n], - xd->dqcoeff + 1280 + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], + BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4, 32); } @@ -215,7 +231,7 @@ void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - vp9_short_idct32x32(xd->dqcoeff + n * 1024, + vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024), xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); } } @@ -228,11 +244,11 @@ void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 64 * 16, 128); } else { - vp9_short_iht16x16(xd->dqcoeff + n * 256, + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type); } } @@ -246,10 +262,10 @@ void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); } else { - vp9_short_iht8x8(xd->dqcoeff + n * 64, + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type); } } @@ -263,19 +279,20 @@ void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], + BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); } else { - vp9_short_iht4x4(xd->dqcoeff + n * 16, + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type); } } } void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(xd->dqcoeff + 4096, + vp9_short_idct32x32(xd->plane[1].dqcoeff, xd->diff + 4096, 64); - vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024, + vp9_short_idct32x32(xd->plane[2].dqcoeff, xd->diff + 4096 + 1024, 64); } @@ -285,9 +302,9 @@ void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; - vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256), xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256), xd->diff + 4096 + 1024 + off, 64); } } @@ -298,9 +315,9 @@ void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { for (n = 0; n < 16; n++) { const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; - vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), xd->diff + 4096 + 1024 + off, 64); } } @@ -311,11 +328,11 @@ void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { for (n = 0; n < 64; n++) { const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; - vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n], - xd->dqcoeff + 4096 + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], + BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n], - xd->dqcoeff + 4096 + 1024 + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], + BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), xd->diff + 4096 + 1024 + off, 64); } } diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index 85ba82dd3..b357c9ac9 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -99,11 +99,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) { blockd[to].predictor = &mb->predictor[from]; } } - - for (r = 0; r < 24; r++) { - blockd[r].qcoeff = &mb->qcoeff[r * 16]; - blockd[r].dqcoeff = &mb->dqcoeff[r * 16]; - } } void vp9_build_block_doffsets(MACROBLOCKD *mb) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 8b6efc384..cf95524e0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -29,9 +29,6 @@ forward_decls vp9_common_forward_decls prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block_8x8 - prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_16x16 @@ -44,15 +41,12 @@ specialize vp9_dequant_idct_add prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block -prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_32x32 -prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block_16x16 - # # RECON # @@ -606,8 +600,7 @@ prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" specialize vp9_subtract_b mmx sse2 prototype int vp9_mbuverror "struct macroblock *mb" -specialize vp9_mbuverror mmx sse2 -vp9_mbuverror_sse2=vp9_mbuverror_xmm +specialize vp9_mbuverror prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" specialize vp9_subtract_b mmx sse2 diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 7277b5f96..b8ec07001 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -245,19 +245,23 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, } #endif if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff, + vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, 16, xd->dst.y_stride, - xd->eobs[0]); + xd->plane[0].eobs[0]); } else { - vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, + vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, - 16, xd->dst.y_stride, xd->eobs[0]); + 16, xd->dst.y_stride, xd->plane[0].eobs[0]); } - vp9_dequant_idct_add_uv_block_8x8( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + + vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, 8, + xd->dst.uv_stride, xd->plane[1].eobs[0]); + + vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8, + xd->dst.uv_stride, xd->plane[2].eobs[0]); } static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, @@ -281,7 +285,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t *q = xd->block[idx].qcoeff; + int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16); int16_t *dq = xd->block[0].dequant; uint8_t *pre = xd->block[ib].predictor; uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; @@ -294,14 +298,14 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, tx_type = get_tx_type_8x8(xd, ib); if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride, - xd->eobs[idx]); + xd->plane[0].eobs[idx]); } else { vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, - xd->eobs[idx]); + xd->plane[0].eobs[idx]); } } } else { - vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, + vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, @@ -319,23 +323,33 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, + xd->plane[1].eobs[i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, + xd->plane[2].eobs[i]); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->plane[1].eobs); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->plane[2].eobs); } else { - vp9_dequant_idct_add_uv_block_8x8 - (xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, 8, + xd->dst.uv_stride, xd->plane[1].eobs[0]); + + vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8, + xd->dst.uv_stride, xd->plane[2].eobs[0]); } #if 0 // def DEC_DEBUG if (dec_debug) { @@ -378,24 +392,31 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, b = &xd->block[ib + iblock[j]]; tx_type = get_tx_type_4x4(xd, ib + iblock[j]); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + vp9_ht_dequant_idct_add_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, - b->dst_stride, xd->eobs[ib + iblock[j]]); + b->dst_stride, + xd->plane[0].eobs[ib + iblock[j]]); } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, - xd->eobs[ib + iblock[j]]); + xd->plane[0].eobs[ib + iblock[j]]); } } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, + xd->plane[1].eobs[i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 8, b->dst_stride, + xd->plane[2].eobs[i]); } } else if (mode == B_PRED) { for (i = 0; i < 16; i++) { @@ -410,13 +431,16 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra4x4_predict(xd, b, b_mode, b->predictor); tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + vp9_ht_dequant_idct_add_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, - xd->eobs[i]); + xd->plane[0].eobs[i]); } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride, + xd->plane[0].eobs[i]); } } #if CONFIG_NEWBINTRAMODES @@ -424,27 +448,25 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); #endif vp9_build_intra_predictors_mbuv(xd); - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->plane[1].eobs); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->plane[2].eobs); } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) { - xd->itxm_add_y_block(xd->qcoeff, + xd->itxm_add_y_block(xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd); - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->plane[1].eobs); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->plane[2].eobs); } else { #if 0 // def DEC_DEBUG if (dec_debug) { @@ -467,22 +489,24 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, BLOCKD *b = &xd->block[i]; tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + vp9_ht_dequant_idct_add_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, - b->dst_stride, xd->eobs[i]); + b->dst_stride, xd->plane[0].eobs[i]); } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride, + xd->plane[0].eobs[i]); } } - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->plane[1].eobs); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->plane[2].eobs); } } @@ -490,11 +514,6 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { const int y_count = y_size * y_size; const int uv_size = y_size / 2; const int uv_count = uv_size * uv_size; - - const int u_qcoeff_offset = (16 * 16) * y_count; - const int v_qcoeff_offset = u_qcoeff_offset + (16 * 16) * uv_count; - const int u_eob_offset = 16 * y_count; - const int v_eob_offset = u_eob_offset + 16 * uv_count; int n; for (n = 0; n < y_count; n++) { @@ -504,20 +523,20 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { const TX_TYPE tx_type = get_tx_type_16x16(mb, (y_idx * (4 * y_size) + x_idx) * 4); if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_16x16(mb->qcoeff + n * 16 * 16, + vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), mb->block[0].dequant , mb->dst.y_buffer + y_offset, mb->dst.y_buffer + y_offset, mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n * 16]); + mb->plane[0].eobs[n * 16]); } else { vp9_ht_dequant_idct_add_16x16_c(tx_type, - mb->qcoeff + n * 16 * 16, + BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), mb->block[0].dequant, mb->dst.y_buffer + y_offset, mb->dst.y_buffer + y_offset, mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n * 16]); + mb->plane[0].eobs[n * 16]); } } @@ -525,54 +544,49 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { const int x_idx = n % uv_size; const int y_idx = n / uv_size; const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16); - vp9_dequant_idct_add_16x16(mb->qcoeff + u_qcoeff_offset + n * 16 * 16, + vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256), mb->block[16].dequant, mb->dst.u_buffer + uv_offset, mb->dst.u_buffer + uv_offset, mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[u_eob_offset + n * 16]); - vp9_dequant_idct_add_16x16(mb->qcoeff + v_qcoeff_offset + n * 16 * 16, + mb->plane[1].eobs[n * 16]); + vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256), mb->block[20].dequant, mb->dst.v_buffer + uv_offset, mb->dst.v_buffer + uv_offset, mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[v_eob_offset + n * 16]); + mb->plane[2].eobs[n * 16]); } } -static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) { +static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) { const int y_count = y_size * y_size; const int uv_size = y_size / 2; const int uv_count = uv_size * uv_size; - - const int u_qcoeff_offset = (8 * 8) * y_count; - const int v_qcoeff_offset = u_qcoeff_offset + (8 * 8) * uv_count; - const int u_eob_offset = 4 * y_count; - const int v_eob_offset = u_eob_offset + 4 * uv_count; int n; // luma for (n = 0; n < y_count; n++) { const int x_idx = n % y_size; const int y_idx = n / y_size; - const int y_offset = (y_idx * 8) * mb->dst.y_stride + (x_idx * 8); - const TX_TYPE tx_type = get_tx_type_8x8(mb, + const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8); + const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * (2 * y_size) + x_idx) * 2); if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_8x8_c(mb->qcoeff + n * 8 * 8, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n * 4]); + vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->dst.y_stride, + xd->plane[0].eobs[n * 4]); } else { vp9_ht_dequant_idct_add_8x8_c(tx_type, - mb->qcoeff + n * 8 * 8, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n * 4]); + BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->dst.y_stride, + xd->plane[0].eobs[n * 4]); } } @@ -580,73 +594,67 @@ static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) { for (n = 0; n < uv_count; n++) { const int x_idx = n % uv_size; const int y_idx = n / uv_size; - const int uv_offset = (y_idx * 8) * mb->dst.uv_stride + (x_idx * 8); - vp9_dequant_idct_add_8x8_c(mb->qcoeff + u_qcoeff_offset + n * 8 * 8, - mb->block[16].dequant, - mb->dst.u_buffer + uv_offset, - mb->dst.u_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[u_eob_offset + n * 4]); - vp9_dequant_idct_add_8x8_c(mb->qcoeff + v_qcoeff_offset + n * 8 * 8, - mb->block[20].dequant, - mb->dst.v_buffer + uv_offset, - mb->dst.v_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[v_eob_offset + n * 4]); + const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8); + vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64), + xd->block[16].dequant, + xd->dst.u_buffer + uv_offset, + xd->dst.u_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[1].eobs[n * 4]); + vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64), + xd->block[20].dequant, + xd->dst.v_buffer + uv_offset, + xd->dst.v_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[2].eobs[n * 4]); } } -static void decode_sb_4x4(MACROBLOCKD *mb, int y_size) { +static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) { const int y_count = y_size * y_size; const int uv_size = y_size / 2; const int uv_count = uv_size * uv_size; - - const int u_qcoeff_offset = (4 * 4) * y_count; - const int v_qcoeff_offset = u_qcoeff_offset + (4 * 4) * uv_count; - const int u_eob_offset = y_count; - const int v_eob_offset = u_eob_offset + uv_count; int n; for (n = 0; n < y_count; n++) { const int x_idx = n % y_size; const int y_idx = n / y_size; - const int y_offset = (y_idx * 4) * mb->dst.y_stride + (x_idx * 4); - const TX_TYPE tx_type = get_tx_type_4x4(mb, y_idx * y_size + x_idx); + const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4); + const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * y_size + x_idx); if (tx_type == DCT_DCT) { - mb->itxm_add(mb->qcoeff + n * 4 * 4, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->dst.y_stride, + xd->plane[0].eobs[n]); } else { vp9_ht_dequant_idct_add_c(tx_type, - mb->qcoeff + n * 4 * 4, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n]); + BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, + xd->dst.y_stride, + xd->plane[0].eobs[n]); } } for (n = 0; n < uv_count; n++) { const int x_idx = n % uv_size; const int y_idx = n / uv_size; - const int uv_offset = (y_idx * 4) * mb->dst.uv_stride + (x_idx * 4); - mb->itxm_add(mb->qcoeff + u_qcoeff_offset + n * 4 * 4, - mb->block[16].dequant, - mb->dst.u_buffer + uv_offset, - mb->dst.u_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[u_eob_offset + n]); - mb->itxm_add(mb->qcoeff + v_qcoeff_offset + n * 4 * 4, - mb->block[20].dequant, - mb->dst.v_buffer + uv_offset, - mb->dst.v_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[v_eob_offset + n]); + const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4); + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16), + xd->block[16].dequant, + xd->dst.u_buffer + uv_offset, + xd->dst.u_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[n]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16), + xd->block[20].dequant, + xd->dst.v_buffer + uv_offset, + xd->dst.v_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[n]); } } @@ -698,18 +706,18 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32; - vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024, + vp9_dequant_idct_add_32x32(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 1024), xd->block[0].dequant, xd->dst.y_buffer + y_offset, xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]); + xd->dst.y_stride, xd->dst.y_stride, xd->plane[0].eobs[n * 64]); } - vp9_dequant_idct_add_32x32(xd->qcoeff + 4096, + vp9_dequant_idct_add_32x32(xd->plane[1].qcoeff, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]); - vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024, + xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[0]); + vp9_dequant_idct_add_32x32(xd->plane[2].qcoeff, xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]); + xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[0]); break; case TX_16X16: decode_sb_16x16(xd, 4); @@ -776,15 +784,18 @@ static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, } else { switch (xd->mode_info_context->mbmi.txfm_size) { case TX_32X32: - vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant, + vp9_dequant_idct_add_32x32(xd->plane[0].qcoeff, xd->block[0].dequant, xd->dst.y_buffer, xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_stride, - xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, xd); + xd->plane[0].eobs[0]); + vp9_dequant_idct_add_16x16(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->dst.u_buffer, xd->dst.u_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[1].eobs[0]); + vp9_dequant_idct_add_16x16(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->dst.v_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[2].eobs[0]); break; case TX_16X16: decode_sb_16x16(xd, 2); @@ -1852,7 +1863,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { vp9_build_block_doffsets(xd); // clear out the coeff buffer - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + vpx_memset(xd->plane[0].qcoeff, 0, sizeof(xd->plane[0].qcoeff)); + vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff)); + vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff)); // Read the mb_no_coeff_skip flag pc->mb_no_coeff_skip = vp9_read_bit(&header_bc); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 9aebcdcfc..c0d1e2adb 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -388,14 +388,3 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, } } } - -void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, - int stride, - MACROBLOCKD *xd) { - vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, - xd->eobs[64]); - vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, - xd->eobs[80]); -} diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 933108dae..bb72bb294 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -40,10 +40,9 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, unsigned char *pre, - unsigned char *dst_u, - unsigned char *dst_v, + unsigned char *dst, int stride, - struct macroblockd *xd); + uint16_t *eobs); void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index e55826379..a4ada2b7e 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -389,19 +389,32 @@ static INLINE int decode_sb(VP9D_COMP* const pbi, const int seg_eob = get_eob(xd, segment_id, eob_max); int i, eobtotal = 0; + assert(count == offset * 3 / 2); + // luma blocks for (i = 0; i < offset; i += inc) { const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob, - xd->qcoeff + i * 16, tx_size); - xd->eobs[i] = c; + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + tx_size); + xd->plane[0].eobs[i] = c; eobtotal += c; } // chroma blocks - for (i = offset; i < count; i += inc) { + for (i = offset; i < offset * 5 / 4; i += inc) { + const int b = i - offset; const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, - xd->qcoeff + i * 16, tx_size); - xd->eobs[i] = c; + BLOCK_OFFSET(xd->plane[1].qcoeff, b, 16), + tx_size); + xd->plane[1].eobs[b] = c; + eobtotal += c; + } + for (i = offset * 5 / 4; i < count; i += inc) { + const int b = i - offset * 5 / 4; + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, + BLOCK_OFFSET(xd->plane[2].qcoeff, b, 16), + tx_size); + xd->plane[2].eobs[b] = c; eobtotal += c; } @@ -415,20 +428,24 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, case TX_32X32: { // 32x32 luma block const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; + int eobtotal = 0, seg_eob; int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, - get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32); - xd->eobs[0] = c; + get_eob(xd, segment_id, 1024), + xd->plane[0].qcoeff, TX_32X32); + xd->plane[0].eobs[0] = c; eobtotal += c; // 16x16 chroma blocks seg_eob = get_eob(xd, segment_id, 256); - for (i = 64; i < 96; i += 16) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, - xd->qcoeff + i * 16, TX_16X16); - xd->eobs[i] = c; - eobtotal += c; - } + + c = decode_coefs(pbi, xd, bc, 64, PLANE_TYPE_UV, seg_eob, + xd->plane[1].qcoeff, TX_16X16); + xd->plane[1].eobs[0] = c; + eobtotal += c; + c = decode_coefs(pbi, xd, bc, 80, PLANE_TYPE_UV, seg_eob, + xd->plane[2].qcoeff, TX_16X16); + xd->plane[2].eobs[0] = c; + eobtotal += c; return eobtotal; } case TX_16X16: @@ -465,22 +482,26 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; + int eobtotal = 0, seg_eob; // Luma block int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, - get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16); - xd->eobs[0] = c; + get_eob(xd, segment_id, 256), + xd->plane[0].qcoeff, TX_16X16); + xd->plane[0].eobs[0] = c; eobtotal += c; // 8x8 chroma blocks seg_eob = get_eob(xd, segment_id, 64); - for (i = 16; i < 24; i += 4) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; - eobtotal += c; - } + + c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV, + seg_eob, xd->plane[1].qcoeff, TX_8X8); + xd->plane[1].eobs[0] = c; + eobtotal += c; + c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV, + seg_eob, xd->plane[2].qcoeff, TX_8X8); + xd->plane[2].eobs[0] = c; + eobtotal += c; return eobtotal; } @@ -493,9 +514,10 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, // luma blocks int seg_eob = get_eob(xd, segment_id, 64); for (i = 0; i < 16; i += 4) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob, + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + TX_8X8); + xd->plane[0].eobs[i] = c; eobtotal += c; } @@ -504,19 +526,31 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, xd->mode_info_context->mbmi.mode == SPLITMV) { // use 4x4 transform for U, V components in I8X8/splitmv prediction mode seg_eob = get_eob(xd, segment_id, 16); - for (i = 16; i < 24; i++) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_4X4); - xd->eobs[i] = c; + for (i = 16; i < 20; i++) { + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, + BLOCK_OFFSET(xd->plane[1].qcoeff, i - 16, 16), + TX_4X4); + xd->plane[1].eobs[i - 16] = c; eobtotal += c; } - } else { - for (i = 16; i < 24; i += 4) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; + for (i = 20; i < 24; i++) { + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, + BLOCK_OFFSET(xd->plane[2].qcoeff, i - 20, 16), + TX_4X4); + xd->plane[2].eobs[i - 20] = c; eobtotal += c; } + } else { + int c; + + c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV, seg_eob, + xd->plane[1].qcoeff, TX_8X8); + xd->plane[1].eobs[0] = c; + eobtotal += c; + c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV, seg_eob, + xd->plane[2].qcoeff, TX_8X8); + xd->plane[2].eobs[0] = c; + eobtotal += c; } return eobtotal; @@ -525,9 +559,10 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, BOOL_DECODER* const bc, PLANE_TYPE type, int i, int seg_eob) { + const struct plane_block_idx pb_idx = plane_block_idx(16, i); const int c = decode_coefs(dx, xd, bc, i, type, seg_eob, - xd->block[i].qcoeff, TX_4X4); - xd->eobs[i] = c; + BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), TX_4X4); + xd->plane[pb_idx.plane].eobs[pb_idx.block] = c; return c; } diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index d74b61919..bcf7dfdb9 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -20,7 +20,8 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, xd->eobs[i * 4 + j]); + vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, + xd->plane[0].eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; @@ -32,35 +33,20 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, } void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dstu, - uint8_t *dstv, int stride, - MACROBLOCKD *xd) { + uint8_t *pre, uint8_t *dst, + int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride, - xd->eobs[16 + i * 2 + j]); - q += 16; - pre += 4; - dstu += 4; - } - - pre += 32 - 8; - dstu += 4 * stride - 8; - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride, - xd->eobs[20 + i * 2 + j]); - q += 16; - pre += 4; - dstv += 4; + vp9_dequant_idct_add(q, dq, pre, dst, 8, stride, eobs[i * 2 + j]); + q += 16; + pre += 4; + dst += 4; } pre += 32 - 8; - dstv += 4 * stride - 8; + dst += 4 * stride - 8; } } @@ -71,28 +57,17 @@ void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *origdest = dst; uint8_t *origpred = pre; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, + xd->plane[0].eobs[0]); vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, 16, stride, xd->eobs[4]); + origdest + 8, 16, stride, + xd->plane[0].eobs[4]); vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, origdest + 8 * stride, 16, stride, - xd->eobs[8]); + xd->plane[0].eobs[8]); vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, origdest + 8 * stride + 8, 16, stride, - xd->eobs[12]); -} - -void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dstu, - uint8_t *dstv, - int stride, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]); - - q += 64; - pre += 64; - - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]); + xd->plane[0].eobs[12]); } void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, @@ -104,7 +79,7 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride, - xd->eobs[i * 4 + j]); + xd->plane[0].eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; @@ -117,36 +92,22 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, uint8_t *pre, - uint8_t *dstu, - uint8_t *dstv, + uint8_t *dst, int stride, - MACROBLOCKD *xd) { + uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride, - xd->eobs[16 + i * 2 + j]); - q += 16; - pre += 4; - dstu += 4; - } - - pre += 32 - 8; - dstu += 4 * stride - 8; - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride, - xd->eobs[20 + i * 2 + j]); - q += 16; - pre += 4; - dstv += 4; + vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 8, stride, + eobs[i * 2 + j]); + q += 16; + pre += 4; + dst += 4; } - pre += 32 - 8; - dstv += 4 * stride - 8; + pre += 32 - 8; + dst += 4 * stride - 8; } } diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c index e174a894a..1a770dcf7 100644 --- a/vp9/encoder/vp9_asm_enc_offsets.c +++ b/vp9/encoder/vp9_asm_enc_offsets.c @@ -29,9 +29,7 @@ DEFINE(vp9_block_zbin_extra, offsetof(BLOCK, zbin_extra)); DEFINE(vp9_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost)); DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift)); -DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff)); DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant)); -DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); END diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 491ea62b5..b2021d7a1 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -171,10 +171,13 @@ struct macroblock { void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch); void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); - void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx); - void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2); - void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type); - void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type); + void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, int y_blocks); + void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2, + int y_blocks); + void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, + int y_blocks); + void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, + int y_blocks); }; #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index eddacb872..bf9410522 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -16,6 +16,8 @@ #include "vp9/common/vp9_invtrans.h" #include "vp9/encoder/vp9_encodeintra.h" +static void encode_intra4x4block(MACROBLOCK *x, int ib); + int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; (void) cpi; @@ -31,18 +33,21 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { for (i = 0; i < 16; i++) { x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED; - vp9_encode_intra4x4block(x, i); + encode_intra4x4block(x, i); } } return vp9_get_mb_ss(x->src_diff); } -void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { +static void encode_intra4x4block(MACROBLOCK *x, int ib) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; + MACROBLOCKD * const xd = &x->e_mbd; TX_TYPE tx_type; + assert(ib < 16); + #if CONFIG_NEWBINTRAMODES b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); #endif @@ -54,12 +59,14 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { if (tx_type != DCT_DCT) { vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(x, ib, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), + b->diff, 16, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], - b->dqcoeff, b->diff, 32); + x->quantize_b_4x4(x, ib, 16); + vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib], + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), + b->diff, 32); } vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -69,7 +76,7 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) { int i; for (i = 0; i < 16; i++) - vp9_encode_intra4x4block(mb, i); + encode_intra4x4block(mb, i); } void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -151,41 +158,47 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { int idx = (ib & 0x02) ? (ib + 2) : ib; + int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16); + assert(idx < 16); tx_type = get_tx_type_8x8(xd, ib); if (tx_type != DCT_DCT) { vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); - x->quantize_b_8x8(x, idx, tx_type); - vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, + x->quantize_b_8x8(x, idx, tx_type, 16); + vp9_short_iht8x8(dqcoeff, xd->block[ib].diff, 16, tx_type); } else { x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT); - vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); + x->quantize_b_8x8(x, idx, DCT_DCT, 16); + vp9_short_idct8x8(dqcoeff, xd->block[ib].diff, 32); } } else { for (i = 0; i < 4; i++) { + int idx = ib + iblock[i]; + int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16); + + assert(idx < 16); b = &xd->block[ib + iblock[i]]; be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, ib + iblock[i]); if (tx_type != DCT_DCT) { vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); + vp9_short_iht4x4(dqcoeff, b->diff, 16, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); - vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], - b->dqcoeff, b->diff, 32); - vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1], - (b + 1)->dqcoeff, (b + 1)->diff, 32); + x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16); + vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]], + dqcoeff, b->diff, 32); + vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i] + 1], + dqcoeff + 16, (b + 1)->diff, 32); i++; } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, ib + iblock[i]); - vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], - b->dqcoeff, b->diff, 32); + x->quantize_b_4x4(x, ib + iblock[i], 16); + vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]], + dqcoeff, b->diff, 32); } } } @@ -206,17 +219,22 @@ void vp9_encode_intra8x8mby(MACROBLOCK *x) { } static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { + MACROBLOCKD * const xd = &x->e_mbd; BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; + int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib); + const int plane = ib < 20 ? 1 : 2; + const int block = ib < 20 ? ib - 16 : ib - 20; + assert(ib >= 16 && ib < 24); vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor); vp9_subtract_b(be, b, 8); x->fwd_txm4x4(be->src_diff, be->coeff, 16); - x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], - b->dqcoeff, b->diff, 16); + x->quantize_b_4x4(x, ib, 16); + vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block], + dqcoeff, b->diff, 16); vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index 0b19b5652..6576c94d2 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -17,7 +17,6 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra4x4mby(MACROBLOCK *mb); -void vp9_encode_intra4x4block(MACROBLOCK *x, int ib); void vp9_encode_intra8x8mby(MACROBLOCK *x); void vp9_encode_intra8x8mbuv(MACROBLOCK *x); void vp9_encode_intra8x8(MACROBLOCK *x, int ib); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 27015773f..f0c215d90 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -544,15 +544,16 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, int ib, PLANE_TYPE type, const int16_t *dequant_ptr, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size) { + int tx_size, int y_blocks) { const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); const int16_t *coeff_ptr = mb->coeff + ib * 16; - int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16; - int eob = xd->eobs[ib], final_eob, sz = 0; + int16_t *qcoeff_ptr; + int16_t *dqcoeff_ptr; + int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; @@ -582,6 +583,9 @@ static void optimize_b(VP9_COMMON *const cm, nzc0 = nzc1 = nzc; #endif + assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); + dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16); + qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16); switch (tx_size) { default: case TX_4X4: { @@ -641,6 +645,7 @@ static void optimize_b(VP9_COMMON *const cm, #endif break; } + assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; @@ -838,7 +843,7 @@ static void optimize_b(VP9_COMMON *const cm, } final_eob++; - xd->eobs[ib] = final_eob; + xd->plane[pb_idx.plane].eobs[pb_idx.block] = final_eob; *a = *l = (final_eob > 0); #if CONFIG_CODE_NONZEROCOUNT assert(final_nzc == final_nzc_exp); @@ -864,7 +869,7 @@ void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 16; b++) { optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], TX_4X4); + tl + vp9_block2left[TX_4X4][b], TX_4X4, 16); } } @@ -886,7 +891,7 @@ void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 16; b < 24; b++) { optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], - tl + vp9_block2left[TX_4X4][b], TX_4X4); + tl + vp9_block2left[TX_4X4][b], TX_4X4, 16); } } @@ -915,7 +920,7 @@ void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, - &above_ec, &left_ec, TX_8X8); + &above_ec, &left_ec, TX_8X8, 16); a[1] = a[0] = above_ec; l[1] = l[0] = left_ec; } @@ -935,7 +940,7 @@ void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, - &above_ec, &left_ec, TX_8X8); + &above_ec, &left_ec, TX_8X8, 16); } } @@ -955,7 +960,7 @@ void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - &ta, &tl, TX_16X16); + &ta, &tl, TX_16X16, 16); } static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -973,7 +978,7 @@ void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - &ta, &tl, TX_32X32); + &ta, &tl, TX_32X32, 64); } void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -992,7 +997,7 @@ void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 1, y_idx = n >> 1; optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16); + ta + x_idx, tl + y_idx, TX_16X16, 64); } } @@ -1016,7 +1021,7 @@ void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 3, y_idx = n >> 2; optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8); + ta + x_idx, tl + y_idx, TX_8X8, 64); } } @@ -1032,7 +1037,7 @@ void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 7, y_idx = n >> 3; optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4); + ta + x_idx, tl + y_idx, TX_4X4, 64); } } @@ -1051,7 +1056,7 @@ void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16); + &above_ec, &left_ec, TX_16X16, 64); } } @@ -1071,7 +1076,7 @@ void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { above_ec = (a[0] + a[1]) != 0; left_ec = (l[0] + l[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8); + &above_ec, &left_ec, TX_8X8, 64); a[0] = a[1] = above_ec; l[0] = l[1] = left_ec; } @@ -1091,7 +1096,7 @@ void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { a = ta + vp9_block2above_sb[TX_4X4][b]; l = tl + vp9_block2left_sb[TX_4X4][b]; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4); + a, l, TX_4X4, 64); } } @@ -1115,7 +1120,7 @@ void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 1, y_idx = n >> 1; optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32); + ta + x_idx, tl + y_idx, TX_32X32, 256); } } @@ -1143,7 +1148,7 @@ void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 3, y_idx = n >> 2; optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16); + ta + x_idx, tl + y_idx, TX_16X16, 256); } } @@ -1179,7 +1184,7 @@ void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 7, y_idx = n >> 3; optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8); + ta + x_idx, tl + y_idx, TX_8X8, 256); } } @@ -1199,7 +1204,7 @@ void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { const int x_idx = n & 15, y_idx = n >> 4; optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4); + ta + x_idx, tl + y_idx, TX_4X4, 256); } } @@ -1222,7 +1227,7 @@ void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &a_ec, &l_ec, TX_32X32); + &a_ec, &l_ec, TX_32X32, 256); } } @@ -1244,7 +1249,7 @@ void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16); + &above_ec, &left_ec, TX_16X16, 256); a[0] = a[1] = a1[0] = a1[1] = above_ec; l[0] = l[1] = l1[0] = l1[1] = left_ec; } @@ -1266,7 +1271,7 @@ void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { above_ec = (a[0] + a[1]) != 0; left_ec = (l[0] + l[1]) != 0; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8); + &above_ec, &left_ec, TX_8X8, 256); a[0] = a[1] = above_ec; l[0] = l[1] = left_ec; } @@ -1286,7 +1291,7 @@ void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { a = ta + vp9_block2above_sb64[TX_4X4][b]; l = tl + vp9_block2left_sb64[TX_4X4][b]; optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4); + a, l, TX_4X4, 256); } } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 881fce50f..46e8a4a93 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -21,14 +21,9 @@ extern int enc_debug; #endif -static INLINE int plane_idx(MACROBLOCKD *xd, int b_idx) { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - if (b_idx < (16 << (sb_type * 2))) - return 0; // Y - else if (b_idx < (20 << (sb_type * 2))) - return 16; // U - assert(b_idx < (24 << (sb_type * 2))); - return 20; // V +static INLINE int plane_idx(int plane) { + return plane == 0 ? 0 : + plane == 1 ? 16 : 20; } void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { @@ -39,8 +34,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { int zbin; int x, y, z, sz; int16_t *coeff_ptr = mb->coeff + b_idx * 16; - int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16; + // ht is luma-only + int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16); + int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16); int16_t *zbin_boost_ptr = b->zrun_zbin_boost; int16_t *zbin_ptr = b->zbin; int16_t *round_ptr = b->round; @@ -53,7 +49,6 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { int nzc = 0; #endif - assert(plane_idx(xd, b_idx) == 0); switch (tx_type) { case ADST_DCT: pt_scan = vp9_row_scan_4x4; @@ -101,23 +96,26 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { } } - xd->eobs[b_idx] = eob + 1; + xd->plane[0].eobs[b_idx] = eob + 1; #if CONFIG_CODE_NONZEROCOUNT xd->nzcs[b_idx] = nzc; #endif } -void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { +void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; - const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); + const int c_idx = plane_idx(pb_idx.plane); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; int i, rc, eob; int zbin; int x, y, z, sz; int16_t *coeff_ptr = mb->coeff + b_idx * 16; - int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16; + int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); + int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, + pb_idx.block, 16); int16_t *zbin_boost_ptr = b->zrun_zbin_boost; int16_t *zbin_ptr = b->zbin; int16_t *round_ptr = b->round; @@ -129,6 +127,9 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { int nzc = 0; #endif + if (c_idx == 0) assert(pb_idx.plane == 0); + if (c_idx == 16) assert(pb_idx.plane == 1); + if (c_idx == 20) assert(pb_idx.plane == 2); vpx_memset(qcoeff_ptr, 0, 32); vpx_memset(dqcoeff_ptr, 0, 32); @@ -165,7 +166,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { } } - xd->eobs[b_idx] = eob + 1; + xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1; #if CONFIG_CODE_NONZEROCOUNT xd->nzcs[b_idx] = nzc; #endif @@ -179,16 +180,20 @@ void vp9_quantize_mby_4x4(MACROBLOCK *x) { if (tx_type != DCT_DCT) { vp9_ht_quantize_b_4x4(x, i, tx_type); } else { - x->quantize_b_4x4(x, i); + x->quantize_b_4x4(x, i, 16); } } } void vp9_quantize_mbuv_4x4(MACROBLOCK *x) { int i; + const MACROBLOCKD * const xd = &x->e_mbd; + const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type; + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; for (i = 16; i < 24; i++) - x->quantize_b_4x4(x, i); + x->quantize_b_4x4(x, i, 16); + xd->mode_info_context->mbmi.sb_type = real_sb_type; } void vp9_quantize_mb_4x4(MACROBLOCK *x) { @@ -196,11 +201,15 @@ void vp9_quantize_mb_4x4(MACROBLOCK *x) { vp9_quantize_mbuv_4x4(x); } -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { +void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; - int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx; - int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx; - const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); + const int c_idx = plane_idx(pb_idx.plane); + int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); + int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, + pb_idx.block, 16); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; const int *pt_scan; @@ -217,6 +226,9 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { break; } + if (c_idx == 0) assert(pb_idx.plane == 0); + if (c_idx == 16) assert(pb_idx.plane == 1); + if (c_idx == 20) assert(pb_idx.plane == 2); vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); @@ -295,12 +307,12 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { } } } - xd->eobs[b_idx] = eob + 1; + xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1; #if CONFIG_CODE_NONZEROCOUNT xd->nzcs[b_idx] = nzc; #endif } else { - xd->eobs[b_idx] = 0; + xd->plane[pb_idx.plane].eobs[pb_idx.block] = 0; #if CONFIG_CODE_NONZEROCOUNT xd->nzcs[b_idx] = 0; #endif @@ -317,12 +329,15 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) { #endif for (i = 0; i < 16; i += 4) { TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (i & 8) + ((i & 4) >> 1)); - x->quantize_b_8x8(x, i, tx_type); + x->quantize_b_8x8(x, i, tx_type, 16); } } void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { int i; + const MACROBLOCKD * const xd = &x->e_mbd; + const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type; + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; #if CONFIG_CODE_NONZEROCOUNT for (i = 16; i < 24; i ++) { @@ -330,7 +345,8 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { } #endif for (i = 16; i < 24; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT); + x->quantize_b_8x8(x, i, DCT_DCT, 16); + xd->mode_info_context->mbmi.sb_type = real_sb_type; } void vp9_quantize_mb_8x8(MACROBLOCK *x) { @@ -346,7 +362,7 @@ void vp9_quantize_mby_16x16(MACROBLOCK *x) { x->e_mbd.nzcs[i] = 0; } #endif - x->quantize_b_16x16(x, 0, tx_type); + x->quantize_b_16x16(x, 0, tx_type, 16); } void vp9_quantize_mb_16x16(MACROBLOCK *x) { @@ -415,9 +431,11 @@ static void quantize(int16_t *zbin_boost_orig_ptr, #endif } -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { +void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; - const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); + const int c_idx = plane_idx(pb_idx.plane); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; const int *pt_scan; @@ -434,37 +452,44 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { break; } + if (c_idx == 0) assert(pb_idx.plane == 0); + if (c_idx == 16) assert(pb_idx.plane == 1); + if (c_idx == 20) assert(pb_idx.plane == 2); quantize(b->zrun_zbin_boost, mb->coeff + 16 * b_idx, 256, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, - xd->qcoeff + 16 * b_idx, - xd->dqcoeff + 16 * b_idx, + BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), + BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), d->dequant, b->zbin_extra, - &xd->eobs[b_idx], + &xd->plane[pb_idx.plane].eobs[pb_idx.block], #if CONFIG_CODE_NONZEROCOUNT &xd->nzcs[b_idx], #endif pt_scan, 1); } -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) { +void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; - const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); + const int c_idx = plane_idx(pb_idx.plane); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; + if (c_idx == 0) assert(pb_idx.plane == 0); + if (c_idx == 16) assert(pb_idx.plane == 1); + if (c_idx == 20) assert(pb_idx.plane == 2); quantize(b->zrun_zbin_boost, mb->coeff + b_idx * 16, 1024, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, - xd->qcoeff + b_idx * 16, - xd->dqcoeff + b_idx * 16, + BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), + BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), d->dequant, b->zbin_extra, - &xd->eobs[b_idx], + &xd->plane[pb_idx.plane].eobs[pb_idx.block], #if CONFIG_CODE_NONZEROCOUNT &xd->nzcs[b_idx], #endif @@ -472,7 +497,7 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) { } void vp9_quantize_sby_32x32(MACROBLOCK *x) { - vp9_regular_quantize_b_32x32(x, 0); + vp9_regular_quantize_b_32x32(x, 0, 64); } void vp9_quantize_sby_16x16(MACROBLOCK *x) { @@ -481,7 +506,7 @@ void vp9_quantize_sby_16x16(MACROBLOCK *x) { for (n = 0; n < 4; n++) { TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, (16 * (n & 2)) + ((n & 1) * 4)); - x->quantize_b_16x16(x, n * 16, tx_type); + x->quantize_b_16x16(x, n * 16, tx_type, 64); } } @@ -491,7 +516,7 @@ void vp9_quantize_sby_8x8(MACROBLOCK *x) { for (n = 0; n < 16; n++) { TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (4 * (n & 12)) + ((n & 3) * 2)); - x->quantize_b_8x8(x, n * 4, tx_type); + x->quantize_b_8x8(x, n * 4, tx_type, 64); } } @@ -504,35 +529,35 @@ void vp9_quantize_sby_4x4(MACROBLOCK *x) { if (tx_type != DCT_DCT) { vp9_ht_quantize_b_4x4(x, n, tx_type); } else { - x->quantize_b_4x4(x, n); + x->quantize_b_4x4(x, n, 64); } } } void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { - x->quantize_b_16x16(x, 64, DCT_DCT); - x->quantize_b_16x16(x, 80, DCT_DCT); + x->quantize_b_16x16(x, 64, DCT_DCT, 64); + x->quantize_b_16x16(x, 80, DCT_DCT, 64); } void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { int i; for (i = 64; i < 96; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT); + x->quantize_b_8x8(x, i, DCT_DCT, 64); } void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { int i; for (i = 64; i < 96; i++) - x->quantize_b_4x4(x, i); + x->quantize_b_4x4(x, i, 64); } void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { int n; for (n = 0; n < 4; n++) - vp9_regular_quantize_b_32x32(x, n * 64); + vp9_regular_quantize_b_32x32(x, n * 64, 256); } void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { @@ -541,7 +566,7 @@ void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { for (n = 0; n < 16; n++) { TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, (16 * (n & 12)) + ((n & 3) * 4)); - x->quantize_b_16x16(x, n * 16, tx_type); + x->quantize_b_16x16(x, n * 16, tx_type, 256); } } @@ -551,7 +576,7 @@ void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { for (n = 0; n < 64; n++) { TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (4 * (n & 56)) + ((n & 7) * 2)); - x->quantize_b_8x8(x, n * 4, tx_type); + x->quantize_b_8x8(x, n * 4, tx_type, 256); } } @@ -564,44 +589,45 @@ void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { if (tx_type != DCT_DCT) { vp9_ht_quantize_b_4x4(x, n, tx_type); } else { - x->quantize_b_4x4(x, n); + x->quantize_b_4x4(x, n, 256); } } } void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) { - vp9_regular_quantize_b_32x32(x, 256); - vp9_regular_quantize_b_32x32(x, 320); + vp9_regular_quantize_b_32x32(x, 256, 256); + vp9_regular_quantize_b_32x32(x, 320, 256); } void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { int i; for (i = 256; i < 384; i += 16) - x->quantize_b_16x16(x, i, DCT_DCT); + x->quantize_b_16x16(x, i, DCT_DCT, 256); } void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { int i; for (i = 256; i < 384; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT); + x->quantize_b_8x8(x, i, DCT_DCT, 256); } void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { int i; for (i = 256; i < 384; i++) - x->quantize_b_4x4(x, i); + x->quantize_b_4x4(x, i, 256); } /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair * of blocks. */ -void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2) { - vp9_regular_quantize_b_4x4(x, b_idx1); - vp9_regular_quantize_b_4x4(x, b_idx2); +void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2, + int y_blocks) { + vp9_regular_quantize_b_4x4(x, b_idx1, y_blocks); + vp9_regular_quantize_b_4x4(x, b_idx2, y_blocks); } static void invert_quant(int16_t *quant, uint8_t *shift, int d) { diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 6ba6cbdd9..0f706a285 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -27,11 +27,15 @@ #endif void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type); -void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx); -void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2); -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type); -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type); -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx); +void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks); +void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2, + int y_blocks); +void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks); +void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, + int y_blocks); +void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, + int y_blocks); void vp9_quantize_mb_4x4(MACROBLOCK *x); void vp9_quantize_mb_8x8(MACROBLOCK *x); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3e44b21fc..35e204aff 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -348,35 +348,36 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { } int vp9_mbblock_error_c(MACROBLOCK *mb) { + MACROBLOCKD * const xd = &mb->e_mbd; BLOCK *be; - BLOCKD *bd; - int i, j; - int berror, error = 0; + int i; + int error = 0; for (i = 0; i < 16; i++) { be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - berror = 0; - for (j = 0; j < 16; j++) { - int this_diff = be->coeff[j] - bd->dqcoeff[j]; - berror += this_diff * this_diff; - } - error += berror; + error += vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); } return error; } int vp9_mbuverror_c(MACROBLOCK *mb) { + MACROBLOCKD * const xd = &mb->e_mbd; BLOCK *be; - BLOCKD *bd; int i, error = 0; - for (i = 16; i < 24; i++) { + for (i = 16; i < 20; i++) { be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - - error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16); + error += vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16), + 16); + } + for (i = 20; i < 24; i++) { + be = &mb->block[i]; + error += vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16), + 16); } return error; @@ -430,15 +431,18 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int ib, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - TX_SIZE tx_size) { + TX_SIZE tx_size, + int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; - const int eob = xd->eobs[ib]; int c = 0; int cost = 0, pad; const int *scan, *nb; - const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); + const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block]; + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); const int ref = mbmi->ref_frame != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; @@ -460,6 +464,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, uint8_t token_cache[1024]; // Check for consistency of tx_size with mode info + assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); if (type == PLANE_TYPE_Y_WITH_DC) { assert(xd->mode_info_context->mbmi.txfm_size == tx_size); } else { @@ -562,6 +567,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, abort(); break; } + assert(eob <= seg_eob); VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); nb = vp9_get_coef_neighbors_handle(scan, &pad); @@ -644,7 +650,7 @@ static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb) { cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], - TX_4X4); + TX_4X4, 16); return cost; } @@ -680,7 +686,7 @@ static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb) { cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], - TX_8X8); + TX_8X8, 16); return cost; } @@ -710,7 +716,7 @@ static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb) { vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); + return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16, 16); } static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb, @@ -858,6 +864,26 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, return error > INT_MAX ? INT_MAX : (int)error; } +static int vp9_sb_uv_block_error_c(int16_t *coeff, + int16_t *dqcoeff0, int16_t *dqcoeff1, + int block_size, int shift) { + int i; + int64_t error = 0; + + for (i = 0; i < block_size / 2; i++) { + unsigned int this_diff = coeff[i] - dqcoeff0[i]; + error += this_diff * this_diff; + } + coeff += block_size / 2; + for (i = 0; i < block_size / 2; i++) { + unsigned int this_diff = coeff[i] - dqcoeff1[i]; + error += this_diff * this_diff; + } + error >>= shift; + + return error > INT_MAX ? INT_MAX : (int)error; +} + static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { int cost = 0, b; MACROBLOCKD *const xd = &x->e_mbd; @@ -871,7 +897,7 @@ static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 64; b++) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb[TX_4X4][b], - tl + vp9_block2left_sb[TX_4X4][b], TX_4X4); + tl + vp9_block2left_sb[TX_4X4][b], TX_4X4, 64); return cost; } @@ -884,7 +910,7 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_4x4(x); vp9_quantize_sby_4x4(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_4x4(cm, x); *skippable = vp9_sby_is_skippable_4x4(xd); } @@ -902,7 +928,7 @@ static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 64; b += 4) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb[TX_8X8][b], - tl + vp9_block2left_sb[TX_8X8][b], TX_8X8); + tl + vp9_block2left_sb[TX_8X8][b], TX_8X8, 64); return cost; } @@ -915,7 +941,7 @@ static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_8x8(x); vp9_quantize_sby_8x8(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_8x8(cm, x); *skippable = vp9_sby_is_skippable_8x8(xd); } @@ -933,7 +959,7 @@ static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 64; b += 16) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb[TX_16X16][b], - tl + vp9_block2left_sb[TX_16X16][b], TX_16X16); + tl + vp9_block2left_sb[TX_16X16][b], TX_16X16, 64); return cost; } @@ -946,7 +972,7 @@ static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_16x16(x); vp9_quantize_sby_16x16(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_16x16(cm, x); *skippable = vp9_sby_is_skippable_16x16(xd); } @@ -960,7 +986,7 @@ static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { vpx_memcpy(&t_above, xd->above_context, sizeof(t_above)); vpx_memcpy(&t_left, xd->left_context, sizeof(t_left)); - return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); + return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32, 64); } static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, @@ -971,7 +997,7 @@ static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 0); *rate = rdcost_sby_32x32(cm, x); *skippable = vp9_sby_is_skippable_32x32(xd); } @@ -1009,7 +1035,7 @@ static int rdcost_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 256; b++) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb64[TX_4X4][b], - tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4); + tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4, 256); return cost; } @@ -1022,7 +1048,7 @@ static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_4x4(x); vp9_quantize_sb64y_4x4(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_4x4(cm, x); *skippable = vp9_sb64y_is_skippable_4x4(xd); } @@ -1040,7 +1066,7 @@ static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 256; b += 4) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb64[TX_8X8][b], - tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8); + tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8, 256); return cost; } @@ -1053,7 +1079,7 @@ static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_8x8(x); vp9_quantize_sb64y_8x8(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_8x8(cm, x); *skippable = vp9_sb64y_is_skippable_8x8(xd); } @@ -1071,7 +1097,7 @@ static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 256; b += 16) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb64[TX_16X16][b], - tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16); + tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16, 256); return cost; } @@ -1085,7 +1111,7 @@ static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_16x16(x); vp9_quantize_sb64y_16x16(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_16x16(cm, x); *skippable = vp9_sb64y_is_skippable_16x16(xd); } @@ -1103,7 +1129,7 @@ static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { for (b = 0; b < 256; b += 64) cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above_sb64[TX_32X32][b], - tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32); + tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32, 256); return cost; } @@ -1117,7 +1143,7 @@ static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_32x32(x); vp9_quantize_sb64y_32x32(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 0); *rate = rdcost_sb64y_32x32(cm, x); *skippable = vp9_sb64y_is_skippable_32x32(xd); } @@ -1163,8 +1189,8 @@ static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { d[29] = p[29]; } -static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, - BLOCKD *b, B_PREDICTION_MODE *best_mode, +static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, + B_PREDICTION_MODE *best_mode, int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, @@ -1175,6 +1201,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, int rate = 0; int distortion; VP9_COMMON *const cm = &cpi->common; + BLOCK *be = x->block + ib; + BLOCKD *b = xd->block + ib; ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; @@ -1188,6 +1216,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4); DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); + assert(ib < 16); #if CONFIG_NEWBINTRAMODES b->bmi.as_mode.context = vp9_find_bpred_context(xd, b); #endif @@ -1224,16 +1253,18 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, vp9_ht_quantize_b_4x4(x, be - x->block, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, be - x->block); + x->quantize_b_4x4(x, be - x->block, 16); } tempa = ta; templ = tl; ratey = cost_coeffs(cm, x, b - xd->block, - PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); + PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16); rate += ratey; - distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; + distortion = vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), + 16) >> 2; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); @@ -1247,7 +1278,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, *a = tempa; *l = templ; copy_predictor(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); + vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); } } b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode); @@ -1304,7 +1335,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, #endif total_rd += rd_pick_intra4x4block( - cpi, mb, mb->block + i, xd->block + i, &best_mode, + cpi, mb, i, &best_mode, bmode_costs, ta + vp9_block2above[TX_4X4][i], tl + vp9_block2left[TX_4X4][i], &r, &ry, &d); @@ -1504,6 +1535,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // note the input and output index mapping int idx = (ib & 0x02) ? (ib + 2) : ib; + assert(ib < 16); for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t this_rd; int rate_t = 0; @@ -1522,11 +1554,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); else x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x, idx, tx_type); + x->quantize_b_8x8(x, idx, tx_type, 16); // compute quantization mse of 8x8 block distortion = vp9_block_error_c((x->block + idx)->coeff, - (xd->block + idx)->dqcoeff, 64); + BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -1537,7 +1569,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, tl1 = tl0 + 1; rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - ta0, tl0, TX_8X8); + ta0, tl0, TX_8X8, 16); rate += rate_t; } else { @@ -1563,21 +1595,23 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, } else if (!(i & 1) && get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); + x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16); do_two = 1; } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, ib + iblock[i]); + x->quantize_b_4x4(x, ib + iblock[i], 16); } - distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); + distortion += vp9_block_error_c(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16), + 16 << do_two); rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, - TX_4X4); + TX_4X4, 16); if (do_two) { i++; rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, - TX_4X4); + TX_4X4, 16); } } b = &xd->block[ib]; @@ -1598,8 +1632,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, best_rd = this_rd; *best_mode = mode; copy_predictor_8x8(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 64); - vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64); + vpx_memcpy(best_dqcoeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 64); + vpx_memcpy(best_dqcoeff + 32, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16) + 64, 64); } } b->bmi.as_mode.first = (*best_mode); @@ -1758,7 +1794,7 @@ static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], - TX_4X4); + TX_4X4, 16); return cost; } @@ -1798,7 +1834,7 @@ static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { for (b = 16; b < 24; b += 4) cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], TX_8X8); + tl + vp9_block2left[TX_8X8][b], TX_8X8, 16); return cost; } @@ -1837,7 +1873,7 @@ static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) { for (b = 16; b < 24; b += 4) cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], TX_16X16); + tl + vp9_block2left[TX_8X8][b], TX_16X16, 64); return cost; } @@ -1851,8 +1887,9 @@ static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_quantize_sbuv_16x16(x); *rate = rd_cost_sbuv_16x16(cm, x, backup); - *distortion = vp9_sb_block_error_c(x->coeff + 1024, - xd->dqcoeff + 1024, 512, 2); + *distortion = vp9_sb_uv_block_error_c(x->coeff + 1024, + xd->plane[1].dqcoeff, + xd->plane[2].dqcoeff, 512, 2); *skip = vp9_sbuv_is_skippable_16x16(xd); } @@ -2113,7 +2150,7 @@ static int rd_cost_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, for (b = 16; b < 24; b += 4) cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], - tl + vp9_block2left[TX_8X8][b], TX_32X32); + tl + vp9_block2left[TX_8X8][b], TX_32X32, 256); return cost; } @@ -2127,8 +2164,9 @@ static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, vp9_quantize_sb64uv_32x32(x); *rate = rd_cost_sb64uv_32x32(cm, x, backup); - *distortion = vp9_sb_block_error_c(x->coeff + 4096, - xd->dqcoeff + 4096, 2048, 0); + *distortion = vp9_sb_uv_block_error_c(x->coeff + 4096, + xd->plane[1].dqcoeff, + xd->plane[2].dqcoeff, 2048, 0); *skip = vp9_sb64uv_is_skippable_32x32(xd); } @@ -2465,12 +2503,13 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, vp9_subtract_b(be, bd, 16); x->fwd_txm4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(x, i); - thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); + x->quantize_b_4x4(x, i, 16); + thisdistortion = vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][i], - tl + vp9_block2left[TX_4X4][i], TX_4X4); + tl + vp9_block2left[TX_4X4][i], TX_4X4, 16); } } *distortion >>= 2; @@ -2508,11 +2547,12 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; int which_mv; - int idx = (ib & 8) + ((ib & 2) << 1); - BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx]; + const int idx = (ib & 8) + ((ib & 2) << 1); + BLOCKD *bd = &xd->block[ib]; BLOCK *be = &x->block[ib], *be2 = &x->block[idx]; int thisdistortion; + assert(idx < 16); for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre; @@ -2531,66 +2571,70 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { if (otherrd) { x->fwd_txm8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); + x->quantize_b_8x8(x, idx, DCT_DCT, 16); + thisdistortion = vp9_block_error_c(be2->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_8X8; othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_8X8][idx], tlcp + vp9_block2left[TX_8X8][idx], - TX_8X8); + TX_8X8, 16); xd->mode_info_context->mbmi.txfm_size = TX_4X4; } for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; be = &x->block[ib + iblock[j]]; x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); + x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16); + thisdistortion = vp9_block_error_c(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][ib + iblock[j]], tl + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); + TX_4X4, 16); *labelyrate += cost_coeffs(cm, x, ib + iblock[j] + 1, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], tl + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); + TX_4X4, 16); } } else /* 8x8 */ { if (otherrd) { for (j = 0; j < 4; j += 2) { - BLOCKD *bd = &xd->block[ib + iblock[j]]; BLOCK *be = &x->block[ib + iblock[j]]; x->fwd_txm8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); + x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16); + thisdistortion = vp9_block_error_c(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_4X4; othercost += cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_4X4][ib + iblock[j]], tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); + TX_4X4, 16); othercost += cost_coeffs(cm, x, ib + iblock[j] + 1, PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); + TX_4X4, 16); xd->mode_info_context->mbmi.txfm_size = TX_8X8; } } x->fwd_txm8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); + x->quantize_b_8x8(x, idx, DCT_DCT, 16); + thisdistortion = vp9_block_error_c(be2->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][idx], - tl + vp9_block2left[TX_8X8][idx], TX_8X8); + tl + vp9_block2left[TX_8X8][idx], TX_8X8, + 16); } } } @@ -2850,13 +2894,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) { for (j = 0; j < 16; j++) if (labels[j] == i) - best_eobs[j] = x->e_mbd.eobs[j]; + best_eobs[j] = x->e_mbd.plane[0].eobs[j]; } else { for (j = 0; j < 4; j++) { int ib = vp9_i8x8_block[j], idx = j * 4; if (labels[ib] == i) - best_eobs[idx] = x->e_mbd.eobs[idx]; + best_eobs[idx] = x->e_mbd.plane[0].eobs[idx]; } } if (other_rd < best_other_rd) @@ -3131,7 +3175,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int; if (mbmi->second_ref_frame > 0) bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int; - x->e_mbd.eobs[i] = bsi.eobs[i]; + x->e_mbd.plane[0].eobs[i] = bsi.eobs[i]; } *returntotrate = bsi.r; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 8f9e9da69..158a0bd33 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -117,13 +117,16 @@ static void tokenize_b(VP9_COMP *cpi, TOKENEXTRA **tp, PLANE_TYPE type, TX_SIZE tx_size, + int y_blocks, int dry_run) { MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ int c = 0; - const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ - int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib; + const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); + const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block]; + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); int seg_eob, default_eob, pad; const int segment_id = mbmi->segment_id; const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; @@ -140,6 +143,7 @@ static void tokenize_b(VP9_COMP *cpi, assert(xd->nzcs[ib] == 0); #endif + assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); if (sb_type == BLOCK_SIZE_SB64X64) { a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb64[tx_size][ib]; @@ -338,7 +342,7 @@ int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 16; i++) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -347,8 +351,10 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) { int skip = 1; int i; - for (i = 16; i < 24; i++) - skip &= (!xd->eobs[i]); + for (i = 0; i < 4; i++) + skip &= (!xd->plane[1].eobs[i]); + for (i = 0; i < 4; i++) + skip &= (!xd->plane[2].eobs[i]); return skip; } @@ -362,13 +368,13 @@ int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 16; i += 4) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) { - return (!xd->eobs[16]) & (!xd->eobs[20]); + return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]); } static int mb_is_skippable_8x8(MACROBLOCKD *xd) { @@ -382,7 +388,7 @@ static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) { } int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->eobs[0]); + return (!xd->plane[0].eobs[0]); } static int mb_is_skippable_16x16(MACROBLOCKD *xd) { @@ -390,11 +396,11 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { } int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { - return (!xd->eobs[0]); + return (!xd->plane[0].eobs[0]); } int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->eobs[64]) & (!xd->eobs[80]); + return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]); } static int sb_is_skippable_32x32(MACROBLOCKD *xd) { @@ -407,7 +413,7 @@ int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 64; i += 16) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -421,7 +427,7 @@ int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 64; i += 4) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -430,8 +436,10 @@ int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd) { int skip = 1; int i = 0; - for (i = 64; i < 96; i += 4) - skip &= (!xd->eobs[i]); + for (i = 0; i < 16; i += 4) + skip &= (!xd->plane[1].eobs[i]); + for (i = 0; i < 16; i += 4) + skip &= (!xd->plane[2].eobs[i]); return skip; } @@ -445,7 +453,7 @@ int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 64; i++) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -454,8 +462,10 @@ int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd) { int skip = 1; int i = 0; - for (i = 64; i < 96; i++) - skip &= (!xd->eobs[i]); + for (i = 0; i < 16; i++) + skip &= (!xd->plane[1].eobs[i]); + for (i = 0; i < 16; i++) + skip &= (!xd->plane[2].eobs[i]); return skip; } @@ -511,34 +521,34 @@ void vp9_tokenize_sb(VP9_COMP *cpi, switch (mbmi->txfm_size) { case TX_32X32: tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, dry_run); + TX_32X32, 64, dry_run); for (b = 64; b < 96; b += 16) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); + TX_16X16, 64, dry_run); break; case TX_16X16: for (b = 0; b < 64; b += 16) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_16X16, dry_run); + TX_16X16, 64, dry_run); for (b = 64; b < 96; b += 16) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); + TX_16X16, 64, dry_run); break; case TX_8X8: for (b = 0; b < 64; b += 4) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_8X8, dry_run); + TX_8X8, 64, dry_run); for (b = 64; b < 96; b += 4) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_8X8, dry_run); + TX_8X8, 64, dry_run); break; case TX_4X4: for (b = 0; b < 64; b++) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_4X4, dry_run); + TX_4X4, 64, dry_run); for (b = 64; b < 96; b++) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_4X4, dry_run); + TX_4X4, 64, dry_run); break; default: assert(0); } @@ -552,13 +562,13 @@ int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 256; i += 64) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) { - return (!xd->eobs[256]) & (!xd->eobs[320]); + return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]); } static int sb64_is_skippable_32x32(MACROBLOCKD *xd) { @@ -570,7 +580,7 @@ int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 256; i += 16) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -579,8 +589,10 @@ int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd) { int skip = 1; int i = 0; - for (i = 256; i < 384; i += 16) - skip &= (!xd->eobs[i]); + for (i = 0; i < 64; i += 16) + skip &= (!xd->plane[1].eobs[i]); + for (i = 0; i < 64; i += 16) + skip &= (!xd->plane[2].eobs[i]); return skip; } @@ -594,7 +606,7 @@ int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 256; i += 4) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -603,8 +615,10 @@ int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd) { int skip = 1; int i = 0; - for (i = 256; i < 384; i += 4) - skip &= (!xd->eobs[i]); + for (i = 0; i < 64; i += 4) + skip &= (!xd->plane[1].eobs[i]); + for (i = 0; i < 64; i += 4) + skip &= (!xd->plane[2].eobs[i]); return skip; } @@ -618,7 +632,7 @@ int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd) { int i = 0; for (i = 0; i < 256; i++) - skip &= (!xd->eobs[i]); + skip &= (!xd->plane[0].eobs[i]); return skip; } @@ -627,8 +641,10 @@ int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd) { int skip = 1; int i = 0; - for (i = 256; i < 384; i++) - skip &= (!xd->eobs[i]); + for (i = 0; i < 64; i++) + skip &= (!xd->plane[1].eobs[i]); + for (i = 0; i < 64; i++) + skip &= (!xd->plane[2].eobs[i]); return skip; } @@ -685,34 +701,34 @@ void vp9_tokenize_sb64(VP9_COMP *cpi, case TX_32X32: for (b = 0; b < 256; b += 64) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, dry_run); + TX_32X32, 256, dry_run); for (b = 256; b < 384; b += 64) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_32X32, dry_run); + TX_32X32, 256, dry_run); break; case TX_16X16: for (b = 0; b < 256; b += 16) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_16X16, dry_run); + TX_16X16, 256, dry_run); for (b = 256; b < 384; b += 16) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); + TX_16X16, 256, dry_run); break; case TX_8X8: for (b = 0; b < 256; b += 4) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_8X8, dry_run); + TX_8X8, 256, dry_run); for (b = 256; b < 384; b += 4) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_8X8, dry_run); + TX_8X8, 256, dry_run); break; case TX_4X4: for (b = 0; b < 256; b++) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_4X4, dry_run); + TX_4X4, 256, dry_run); for (b = 256; b < 384; b++) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_4X4, dry_run); + TX_4X4, 256, dry_run); break; default: assert(0); } @@ -780,29 +796,29 @@ void vp9_tokenize_mb(VP9_COMP *cpi, cpi->skip_false_count[mb_skip_context] += skip_inc; if (tx_size == TX_16X16) { - tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); + tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, 16, dry_run); for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, 16, dry_run); } } else if (tx_size == TX_8X8) { for (b = 0; b < 16; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, 16, dry_run); } if (xd->mode_info_context->mbmi.mode == I8X8_PRED || xd->mode_info_context->mbmi.mode == SPLITMV) { for (b = 16; b < 24; b++) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, 16, dry_run); } } else { for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, 16, dry_run); } } } else { for (b = 0; b < 16; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, 16, dry_run); for (b = 16; b < 24; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, 16, dry_run); } if (dry_run) *t = t_backup; diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm index 90c793d4f..51314a7a8 100644 --- a/vp9/encoder/x86/vp9_encodeopt.asm +++ b/vp9/encoder/x86/vp9_encodeopt.asm @@ -260,117 +260,3 @@ sym(vp9_mbblock_error_xmm_impl): UNSHADOW_ARGS pop rbp ret - - -;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_mmx_impl) PRIVATE -sym(vp9_mbuverror_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor mm7, mm7 - -.mbuverror_loop_mmx: - - movq mm1, [rsi] - movq mm2, [rdi] - - psubw mm1, mm2 - pmaddwd mm1, mm1 - - - movq mm3, [rsi+8] - movq mm4, [rdi+8] - - psubw mm3, mm4 - pmaddwd mm3, mm3 - - - paddd mm7, mm1 - paddd mm7, mm3 - - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop_mmx - - movq mm0, mm7 - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_xmm_impl) PRIVATE -sym(vp9_mbuverror_xmm_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor xmm3, xmm3 - -.mbuverror_loop: - - movdqa xmm1, [rsi] - movdqa xmm2, [rdi] - - psubw xmm1, xmm2 - pmaddwd xmm1, xmm1 - - paddd xmm3, xmm1 - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop - - pxor xmm0, xmm0 - movdqa xmm1, xmm3 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - paddd xmm1, xmm2 - - movdqa xmm2, xmm1 - - psrldq xmm1, 8 - paddd xmm1, xmm2 - - movq rax, xmm1 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c index 2bf32c569..9557af119 100644 --- a/vp9/encoder/x86/vp9_x86_csystemdependent.c +++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c @@ -26,17 +26,10 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) { int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr); int vp9_mbblock_error_mmx(MACROBLOCK *mb) { short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; + short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff; return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr); } -int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_mmx(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_mmx_impl(s_ptr, d_ptr); -} - void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); @@ -54,17 +47,10 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr); int vp9_mbblock_error_xmm(MACROBLOCK *mb) { short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; + short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff; return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr); } -int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_xmm(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_xmm_impl(s_ptr, d_ptr); -} - void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); |