diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_blockd.h | 4 | ||||
-rw-r--r-- | vp9/common/vp9_findnearmv.c | 4 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 19 | ||||
-rw-r--r-- | vp9/decoder/vp9_dequantize.c | 40 | ||||
-rw-r--r-- | vp9/decoder/vp9_dequantize.h | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.c | 77 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2_intrinsics.c (renamed from vp9/encoder/x86/vp9_dct_sse2.c) | 0 | ||||
-rw-r--r-- | vp9/vp9cx.mk | 6 |
9 files changed, 60 insertions, 94 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index c7429d553..b35c1c246 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -380,9 +380,7 @@ typedef struct macroblockd { void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch); void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch); void (*itxm_add)(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *output, int pitch, int stride); - void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr, - uint8_t *dst_ptr, int pitch, int stride); + uint8_t *pred, uint8_t *output, int pitch, int stride, int eob); void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd); void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index f713212a7..77b79395e 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -190,7 +190,11 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, col_offset = this_mv.as_mv.col >> 3; offset = ref_y_stride * row_offset + col_offset; score = 0; +#if !CONFIG_ABOVESPREFMV if (xd->up_available) { +#else + if (xd->up_available && xd->left_available) { +#endif vp9_sub_pixel_variance16x2(above_ref + offset, ref_y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 3e4e25114..2bd26c83e 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -38,7 +38,7 @@ specialize vp9_dequant_idct_add_16x16 prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_8x8 -prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride" +prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 8e9e5ad7d..d4212a52e 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -126,7 +126,6 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1; xd->inv_txm4x4 = vp9_short_idct4x4llm; xd->itxm_add = vp9_dequant_idct_add; - xd->dc_only_itxm_add = vp9_dc_only_idct_add; xd->itxm_add_y_block = vp9_dequant_idct_add_y_block; xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; if (xd->lossless) { @@ -134,7 +133,6 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->inv_txm4x4_1 = vp9_short_inv_walsh4x4_1_x8; xd->inv_txm4x4 = vp9_short_inv_walsh4x4_x8; xd->itxm_add = vp9_dequant_idct_add_lossless_c; - xd->dc_only_itxm_add = vp9_dc_only_inv_walsh_add_c; xd->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; } @@ -297,11 +295,11 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor); xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor); xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, @@ -351,17 +349,18 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, b->dst_stride, xd->eobs[ib + iblock[j]]); } else { xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + *(b->base_dst) + b->dst, 16, b->dst_stride, + xd->eobs[ib + iblock[j]]); } } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride); + *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); } } else if (mode == B_PRED) { for (i = 0; i < 16; i++) { @@ -384,7 +383,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs[i]); } else { xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } } if (!xd->mode_info_context->mbmi.mb_skip_coeff) { @@ -440,7 +439,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, b->dst_stride, xd->eobs[i]); } else { xd->itxm_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } } xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, @@ -549,7 +548,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 3) * 4, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride); + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); } } } else { diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index c2d42ea4a..46e5656bd 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -81,17 +81,24 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, } void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride) { + uint8_t *dest, int pitch, int stride, int eob) { int i; int16_t output[16]; - for (i = 0; i < 16; i++) - input[i] *= dq[i]; + if (eob > 1) { + for (i = 0; i < 16; i++) + input[i] *= dq[i]; - // the idct halves ( >> 1) the pitch - vp9_short_idct4x4llm_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); - add_residual(output, pred, pitch, dest, stride, 4, 4); + // the idct halves ( >> 1) the pitch + vp9_short_idct4x4llm_c(input, output, 4 << 1); + + vpx_memset(input, 0, 32); + + add_residual(output, pred, pitch, dest, stride, 4, 4); + } else { + vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride); + ((int *)input)[0] = 0; + } } void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, @@ -112,16 +119,23 @@ void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred, void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, - int pitch, int stride) { + int pitch, int stride, int eob) { int i; int16_t output[16]; - for (i = 0; i < 16; i++) - input[i] *= dq[i]; + if (eob > 1) { + for (i = 0; i < 16; i++) + input[i] *= dq[i]; - vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); - add_residual(output, pred, pitch, dest, stride, 4, 4); + vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); + + vpx_memset(input, 0, 32); + + add_residual(output, pred, pitch, dest, stride, 4, 4); + } else { + vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride); + ((int *)input)[0] = 0; + } } void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 2e7c07357..bde27bb7a 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -18,7 +18,7 @@ void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *output, - int pitch, int stride); + int pitch, int stride, int eob); void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, unsigned char *pred, diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 0ec5036e4..b17955b1c 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -21,13 +21,7 @@ void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (xd->eobs[i * 4 + j] > 1) { - xd->itxm_add(q, dq, dst, dst, stride, stride); - } else { - xd->dc_only_itxm_add(q[0]*dq[0], dst, dst, stride, stride); - ((int *)q)[0] = 0; - } - + xd->itxm_add(q, dq, dst, dst, stride, stride, xd->eobs[i * 4 + j]); q += 16; dst += 4; } @@ -44,13 +38,7 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (xd->eobs[i * 4 + j] > 1) - vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride); - else { - vp9_dc_only_idct_add(q[0]*dq[0], pre, dst, 16, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, xd->eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; @@ -69,13 +57,8 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (xd->eobs[16 + i * 2 + j] > 1) - vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride); - else { - vp9_dc_only_idct_add(q[0]*dq[0], pre, dstu, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride, + xd->eobs[16 + i * 2 + j]); q += 16; pre += 4; dstu += 4; @@ -87,13 +70,8 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (xd->eobs[20 + i * 2 + j] > 1) - vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride); - else { - vp9_dc_only_idct_add(q[0]*dq[0], pre, dstv, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride, + xd->eobs[20 + i * 2 + j]); q += 16; pre += 4; dstv += 4; @@ -113,13 +91,7 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (xd->eobs[16 + i * 2 + j] > 1) { - xd->itxm_add(q, dq, dstu, dstu, stride, stride); - } else { - xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride); - ((int *)q)[0] = 0; - } - + xd->itxm_add(q, dq, dstu, dstu, stride, stride, xd->eobs[16 + i * 2 + j]); q += 16; dstu += 4; } @@ -129,13 +101,7 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (xd->eobs[20 + i * 2 + j] > 1) { - xd->itxm_add(q, dq, dstv, dstv, stride, stride); - } else { - xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride); - ((int *)q)[0] = 0; - } - + xd->itxm_add(q, dq, dstv, dstv, stride, stride, xd->eobs[20 + i * 2 + j]); q += 16; dstv += 4; } @@ -216,13 +182,8 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - if (xd->eobs[i * 4 + j] > 1) - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride); - else { - vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride, + xd->eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; @@ -243,13 +204,8 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (xd->eobs[16 + i * 2 + j] > 1) - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride); - else { - vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride, + xd->eobs[16 + i * 2 + j]); q += 16; pre += 4; dstu += 4; @@ -261,13 +217,8 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - if (xd->eobs[20 + i * 2 + j] > 1) - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride); - else { - vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride); - ((int *)q)[0] = 0; - } - + vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride, + xd->eobs[20 + i * 2 + j]); q += 16; pre += 4; dstv += 4; diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c index ff884d999..ff884d999 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 22fa565e5..43dba1373 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -110,10 +110,10 @@ VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_intrinsics.c ifeq ($(HAVE_SSE2),yes) -vp9/encoder/x86/vp9_dct_sse2.c.d: CFLAGS += -msse2 -vp9/encoder/x86/vp9_dct_sse2.c.o: CFLAGS += -msse2 +vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.d: CFLAGS += -msse2 +vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.o: CFLAGS += -msse2 endif |