From b575394e215ea46c9885992d85c3047de5171f4c Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Wed, 12 Dec 2012 15:49:39 -0800 Subject: Improved vp9_ihtllm_c As suggested by Yaowu, we can use eob to reduce the complexity of the vp9_ihtllm_c function. For the 1080p test clip used, the decoder performance improved by 17%. Change-Id: I32486f2f06f9b8f60467d2a574209aa3a3daa435 --- vp9/decoder/vp9_decodframe.c | 19 ++++++++------- vp9/decoder/vp9_dequantize.c | 56 +++++++++++++++++++++++++------------------- vp9/decoder/vp9_dequantize.h | 7 +++--- 3 files changed, 47 insertions(+), 35 deletions(-) (limited to 'vp9/decoder') diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index f95a83afa..b18ef8b02 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -248,7 +248,8 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff, xd->block[0].dequant, xd->predictor, - xd->dst.y_buffer, 16, xd->dst.y_stride); + xd->dst.y_buffer, 16, xd->dst.y_stride, + xd->eobs[0]); } else { vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, @@ -294,7 +295,8 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, } tx_type = get_tx_type_8x8(xd, &xd->block[ib]); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride); + vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride, + xd->eobs[idx]); } else { vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[idx]); @@ -393,7 +395,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, - b->dst_stride); + b->dst_stride, b->eob); } else { vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride); @@ -438,7 +440,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + *(b->base_dst) + b->dst, 16, b->dst_stride, + b->eob); } else { vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride); @@ -500,7 +503,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, - b->dst_stride); + b->dst_stride, b->eob); } else { vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride); @@ -553,7 +556,7 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, tx_type, xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride); + xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob); } else { vp9_dequant_idct_add_16x16( xd->qcoeff, xd->block[0].dequant, @@ -591,7 +594,7 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 1) * 8, xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride + x_idx * 16 + (i & 1) * 8, - stride, stride); + stride, stride, b->eob); } else { vp9_dequant_idct_add_8x8_c( q, dq, @@ -647,7 +650,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 3) * 4, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride); + xd->dst.y_stride, xd->dst.y_stride, b->eob); } else { vp9_dequant_idct_add_c( b->qcoeff, b->dequant, diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 79114d58c..39a2de14b 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -13,7 +13,6 @@ #include "vp9/decoder/vp9_dequantize.h" #include "vpx_mem/vpx_mem.h" #include "vp9/decoder/vp9_onyxd_int.h" - static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride, int width, int height) { int r, c; @@ -74,7 +73,7 @@ void vp9_dequantize_b_c(BLOCKD *d) { void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, - int pitch, int stride) { + int pitch, int stride, uint16_t eobs) { int16_t output[16]; int16_t *diff_ptr = output; int i; @@ -83,7 +82,7 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, input[i] = dq[i] * input[i]; } - vp9_ihtllm(input, output, 4 << 1, tx_type, 4); + vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs); vpx_memset(input, 0, 32); @@ -93,21 +92,25 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, - int pitch, int stride) { + int pitch, int stride, uint16_t eobs) { int16_t output[64]; int16_t *diff_ptr = output; int i; + if (eobs == 0) { + /* All 0 DCT coefficient */ + vp9_copy_mem8x8(pred, pitch, dest, stride); + } else if (eobs > 0) { + input[0] = dq[0] * input[0]; + for (i = 1; i < 64; i++) { + input[i] = dq[1] * input[i]; + } - input[0] = dq[0] * input[0]; - for (i = 1; i < 64; i++) { - input[i] = dq[1] * input[i]; - } - - vp9_ihtllm(input, output, 16, tx_type, 8); + vp9_ihtllm(input, output, 16, tx_type, 8, eobs); - vpx_memset(input, 0, 128); + vpx_memset(input, 0, 128); - add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); + add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); + } } void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, @@ -269,26 +272,31 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride) { + uint8_t *dest, int pitch, int stride, + uint16_t eobs) { int16_t output[256]; int16_t *diff_ptr = output; int i; + if (eobs == 0) { + /* All 0 DCT coefficient */ + vp9_copy_mem16x16(pred, pitch, dest, stride); + } else if (eobs > 0) { + input[0]= input[0] * dq[0]; - input[0]= input[0] * dq[0]; - - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 256; i++) - input[i] = input[i] * dq[1]; + // recover quantizer for 4 4x4 blocks + for (i = 1; i < 256; i++) + input[i] = input[i] * dq[1]; - // inverse hybrid transform - vp9_ihtllm(input, output, 32, tx_type, 16); + // inverse hybrid transform + vp9_ihtllm(input, output, 32, tx_type, 16, eobs); - // the idct halves ( >> 1) the pitch - // vp9_short_idct16x16_c(input, output, 32); + // the idct halves ( >> 1) the pitch + // vp9_short_idct16x16_c(input, output, 32); - vpx_memset(input, 0, 512); + vpx_memset(input, 0, 512); - add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); + } } void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 8a6bf2b26..f348b21b0 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -58,16 +58,17 @@ typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, const short *dq, void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq, unsigned char *pred, unsigned char *dest, - int pitch, int stride); + int pitch, int stride, uint16_t eobs); void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, const short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride); + unsigned char *dest, int pitch, int stride, + uint16_t eobs); void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, const short *dq, unsigned char *pred, unsigned char *dest, - int pitch, int stride); + int pitch, int stride, uint16_t eobs); #if CONFIG_SUPERBLOCKS void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq, -- cgit v1.2.3 From 6247b239bca25cadb5fb16b1bc6f4c77e764c4ff Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 13 Dec 2012 23:53:11 -0800 Subject: reset segement map on key frame This is to fix a decoder crash when decoder skips a number of frame to continue decoding from a later key frame. Change-Id: I3ba116eba6c3440e0528a21f53745f694302e4ad --- vp9/decoder/vp9_decodframe.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'vp9/decoder') diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index b18ef8b02..87f52df7f 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -38,7 +38,7 @@ #define COEFCOUNT_TESTING -// #define DEC_DEBUG +//#define DEC_DEBUG #ifdef DEC_DEBUG int dec_debug = 0; #endif @@ -978,8 +978,8 @@ decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, } #endif #ifdef DEC_DEBUG - dec_debug = (pbi->common.current_video_frame == 46 && - mb_row == 5 && mb_col == 2); + dec_debug = (pbi->common.current_video_frame == 1 && + mb_row == 2 && mb_col == 8); if (dec_debug) #if CONFIG_SUPERBLOCKS printf("Enter Debug %d %d sb %d\n", mb_row, mb_col, @@ -1139,7 +1139,10 @@ static void init_frame(VP9D_COMP *pbi) { MACROBLOCKD *const xd = &pbi->mb; if (pc->frame_type == KEY_FRAME) { - /* Various keyframe initializations */ + + if (pc->last_frame_seg_map) + vpx_memset(pc->last_frame_seg_map, 0, (pc->mb_rows * pc->mb_cols)); + vp9_init_mv_probs(pc); vp9_init_mbmode_probs(pc); @@ -1182,6 +1185,7 @@ static void init_frame(VP9D_COMP *pbi) { vp9_update_mode_info_border(pc, pc->mip); vp9_update_mode_info_in_image(pc, pc->mi); + } else { if (!pc->use_bilinear_mc_filter) @@ -1423,6 +1427,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->ref_pred_probs[0] = 120; pc->ref_pred_probs[1] = 80; pc->ref_pred_probs[2] = 40; + } else { for (i = 0; i < PREDICTION_PROBS; i++) { if (vp9_read_bit(&header_bc)) -- cgit v1.2.3 From 5ebe94f9f148693a34bc861ef97a7fff6fc38251 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Sun, 23 Dec 2012 07:20:10 -0800 Subject: Build fixes to merge vp9-preview into master Various fixups to resolve issues when building vp9-preview under the more stringent checks placed on the experimental branch. Change-Id: I21749de83552e1e75c799003f849e6a0f1a35b07 --- vp9/decoder/vp9_dboolhuff.h | 2 +- vp9/decoder/vp9_decodframe.c | 4 +++- vp9/decoder/vp9_onyxd_int.h | 2 +- vp9/decoder/x86/vp9_idct_blk_mmx.c | 2 +- vp9/decoder/x86/vp9_idct_blk_sse2.c | 2 +- vp9/decoder/x86/vp9_x86_dsystemdependent.c | 2 +- 6 files changed, 8 insertions(+), 6 deletions(-) (limited to 'vp9/decoder') diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index a1c0c7956..635bd5b7d 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -13,7 +13,7 @@ #define VP9_DECODER_VP9_DBOOLHUFF_H_ #include #include -#include "vpx_ports/config.h" +#include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 87f52df7f..12feca6c4 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -927,9 +927,9 @@ decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, mb_col = 0; for (sb_col = 0; sb_col < sb_cols; sb_col++) { +#if CONFIG_SUPERBLOCKS MODE_INFO *mi = xd->mode_info_context; -#if CONFIG_SUPERBLOCKS mi->mbmi.encoded_as_sb = vp9_read(bc, pc->sb_coded); #endif @@ -942,7 +942,9 @@ decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, xd->mb_index = i; +#if CONFIG_SUPERBLOCKS mi = xd->mode_info_context; +#endif if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { // MB lies outside frame, skip on to next mb_row += dy; diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index e4f3228c7..49e13f7f4 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -11,7 +11,7 @@ #ifndef VP9_DECODER_VP9_ONYXD_INT_H_ #define VP9_DECODER_VP9_ONYXD_INT_H_ -#include "vpx_ports/config.h" +#include "./vpx_config.h" #include "vp9/common/vp9_onyxd.h" #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_onyxc_int.h" diff --git a/vp9/decoder/x86/vp9_idct_blk_mmx.c b/vp9/decoder/x86/vp9_idct_blk_mmx.c index df3485233..8279eaa4a 100644 --- a/vp9/decoder/x86/vp9_idct_blk_mmx.c +++ b/vp9/decoder/x86/vp9_idct_blk_mmx.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_ports/config.h" +#include "./vpx_config.h" #include "vp9/common/vp9_blockd.h" #include "vp9/decoder/vp9_dequantize.h" #include "vp9/decoder/x86/vp9_idct_mmx.h" diff --git a/vp9/decoder/x86/vp9_idct_blk_sse2.c b/vp9/decoder/x86/vp9_idct_blk_sse2.c index 6c1fd1439..badd97f73 100644 --- a/vp9/decoder/x86/vp9_idct_blk_sse2.c +++ b/vp9/decoder/x86/vp9_idct_blk_sse2.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_ports/config.h" +#include "./vpx_config.h" #include "vp9/common/vp9_blockd.h" #include "vp9/decoder/vp9_dequantize.h" diff --git a/vp9/decoder/x86/vp9_x86_dsystemdependent.c b/vp9/decoder/x86/vp9_x86_dsystemdependent.c index d1cc53fce..51ee8ec31 100644 --- a/vp9/decoder/x86/vp9_x86_dsystemdependent.c +++ b/vp9/decoder/x86/vp9_x86_dsystemdependent.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_ports/config.h" +#include "./vpx_config.h" #include "vpx_ports/x86.h" #include "vp9/decoder/vp9_onyxd_int.h" -- cgit v1.2.3