From 47b6030dda54dd6422b5e01774608581b515575a Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Thu, 31 Oct 2013 13:52:08 -0700 Subject: Reducing the number of foreach_transformed_block() calls. The change doesn't affect the bitstream. It changes the order or function calls and affects how we reconstruct intra- and inter-blocks. Speed up is about 1...1.5%. For intra-blocks: Before: for each transform block read tokens for each transform block do prediction for each transform block do inverse transform Now: for each transform block read tokens do prediction do inverse transform For inter-blocks: Before: for each transform block read tokens for each transform block do inverse transform Now: for each transform block read tokens do inverse transform Change-Id: I12a79bf1aa5a18c351b8010369bd3ff1deae1570 --- vp9/decoder/vp9_decodframe.c | 98 +++++++++++++++++++++++++++----------------- vp9/decoder/vp9_detokenize.c | 38 ++++------------- vp9/decoder/vp9_detokenize.h | 6 +-- 3 files changed, 72 insertions(+), 70 deletions(-) (limited to 'vp9/decoder') diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index bf3a10159..e3a2b7710 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -244,9 +244,8 @@ static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) { aligned_mi_cols)); } -static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - MACROBLOCKD* const xd = arg; +static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block); const int stride = pd->dst.stride; @@ -292,9 +291,19 @@ static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, } } -static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - MACROBLOCKD* const xd = arg; +struct intra_args { + VP9_COMMON *cm; + MACROBLOCKD *xd; + vp9_reader *r; +}; + +static void predict_and_reconstruct_intra_block(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct intra_args *const args = arg; + VP9_COMMON *const cm = args->cm; + MACROBLOCKD *const xd = args->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; MODE_INFO *const mi = xd->mi_8x8[0]; const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, @@ -313,25 +322,30 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, b_width_log2(plane_bsize), tx_size, mode, dst, pd->dst.stride, dst, pd->dst.stride); - if (!mi->mbmi.skip_coeff) - decode_block(plane, block, plane_bsize, tx_size, arg); + if (!mi->mbmi.skip_coeff) { + vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size, + args->r); + inverse_transform_block(xd, plane, block, plane_bsize, tx_size); + } } -static int decode_tokens(VP9_COMMON *const cm, MACROBLOCKD *const xd, - BLOCK_SIZE bsize, vp9_reader *r) { - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - - if (mbmi->skip_coeff) { - reset_skip_context(xd, bsize); - return -1; - } else { - if (cm->seg.enabled) - setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, - cm->base_qindex)); - - // TODO(dkovalev) if (!vp9_reader_has_error(r)) - return vp9_decode_tokens(cm, xd, &cm->seg, r, bsize); - } +struct inter_args { + VP9_COMMON *cm; + MACROBLOCKD *xd; + vp9_reader *r; + int *eobtotal; +}; + +static void reconstruct_inter_block(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct inter_args *args = arg; + VP9_COMMON *const cm = args->cm; + MACROBLOCKD *const xd = args->xd; + + *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block, + plane_bsize, tx_size, args->r); + inverse_transform_block(xd, plane, block, plane_bsize, tx_size); } static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, @@ -385,7 +399,6 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; - int eobtotal; set_offsets(cm, xd, tile, bsize, mi_row, mi_col); vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); @@ -395,32 +408,41 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, // Has to be called after set_offsets mbmi = &xd->mi_8x8[0]->mbmi; - eobtotal = decode_tokens(cm, xd, bsize, r); - if (!is_inter_block(mbmi)) { - // Intra reconstruction - foreach_transformed_block(xd, bsize, decode_block_intra, xd); + if (mbmi->skip_coeff) { + reset_skip_context(xd, bsize); } else { - // Inter reconstruction - const int decode_blocks = (eobtotal > 0); - - if (!less8x8) { - assert(mbmi->sb_type == bsize); - if (eobtotal == 0) - mbmi->skip_coeff = 1; // skip loopfilter - } + if (cm->seg.enabled) + setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, + cm->base_qindex)); + } + if (!is_inter_block(mbmi)) { + struct intra_args arg = { cm, xd, r }; + foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block, + &arg); + } else { + // Setup set_ref(cm, xd, 0, mi_row, mi_col); if (has_second_ref(mbmi)) set_ref(cm, xd, 1, mi_row, mi_col); xd->subpix.filter_x = xd->subpix.filter_y = vp9_get_filter_kernel(mbmi->interp_filter); + + // Prediction vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - if (decode_blocks) - foreach_transformed_block(xd, bsize, decode_block, xd); + // Reconstruction + if (!mbmi->skip_coeff) { + int eobtotal = 0; + struct inter_args arg = { cm, xd, r, &eobtotal }; + foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); + if (!less8x8 && eobtotal == 0) + mbmi->skip_coeff = 1; // skip loopfilter + } } + xd->corrupted |= vp9_reader_has_error(r); } diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 0d0f0dfe0..6ecce2867 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -210,45 +210,25 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, return c; } -struct decode_block_args { - VP9_COMMON *cm; - MACROBLOCKD *xd; - struct segmentation *seg; - vp9_reader *r; - int *eobtotal; -}; - -static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *argv) { - const struct decode_block_args* const arg = argv; - - // find the maximum eob for this transform size, adjusted by segment - MACROBLOCKD *xd = arg->xd; - const struct segmentation *seg = arg->seg; - struct macroblockd_plane* pd = &xd->plane[plane]; - const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; - const int seg_eob = get_tx_eob(seg, segment_id, tx_size); +int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, vp9_reader *r) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id, + tx_size); int aoff, loff, eob, pt; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); pt = get_entropy_context(tx_size, pd->above_context + aoff, pd->left_context + loff); - eob = decode_coefs(arg->cm, xd, arg->r, block, + eob = decode_coefs(cm, xd, r, block, pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block), tx_size, pd->dequant, pt); set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff); pd->eobs[block] = eob; - *arg->eobtotal += eob; + return eob; } -int vp9_decode_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, - struct segmentation *seg, - vp9_reader *r, BLOCK_SIZE bsize) { - int eobtotal = 0; - struct decode_block_args args = {cm, xd, seg, r, &eobtotal}; - foreach_transformed_block(xd, bsize, decode_block, &args); - return eobtotal; -} + diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 0fb4c3cc9..94dd8e46e 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -15,8 +15,8 @@ #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_dboolhuff.h" -int vp9_decode_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, - struct segmentation *seg, - vp9_reader *r, BLOCK_SIZE bsize); +int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, vp9_reader *r); #endif // VP9_DECODER_VP9_DETOKENIZE_H_ -- cgit v1.2.3