summaryrefslogtreecommitdiff
path: root/vp9/decoder
diff options
context:
space:
mode:
authorRonald S. Bultje <rbultje@google.com>2013-02-13 12:28:19 -0800
committerRonald S. Bultje <rbultje@google.com>2013-02-13 12:28:19 -0800
commit42d6be8080c8201f3a1844357c551cefed9d5f99 (patch)
tree79757c7eeb76e697f85da549ffde250141ecb8f0 /vp9/decoder
parent491d095214bb05fc28a7490d1bc690a73cb019e2 (diff)
downloadlibvpx-42d6be8080c8201f3a1844357c551cefed9d5f99.tar
libvpx-42d6be8080c8201f3a1844357c551cefed9d5f99.tar.gz
libvpx-42d6be8080c8201f3a1844357c551cefed9d5f99.tar.bz2
libvpx-42d6be8080c8201f3a1844357c551cefed9d5f99.zip
Remove 2nd-order transform for first-order DC coefficients.
Since addition of the larger-scale transforms (16x16, 32x32), these don't give a benefit at macroblock-sizes anymore. At superblock-sizes, 2nd-order transform was never used over the larger transforms. Future work should test whether there is a benefit for that use case. Change-Id: I90cadfc42befaf201de3eb0c4f7330c56e33330a
Diffstat (limited to 'vp9/decoder')
-rw-r--r--vp9/decoder/vp9_decodframe.c34
-rw-r--r--vp9/decoder/vp9_dequantize.h12
-rw-r--r--vp9/decoder/vp9_idct_blk.c45
3 files changed, 77 insertions, 14 deletions
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 3324186ef..9f4db6bf7 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -304,7 +304,8 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
0, xd->eobs[idx]);
}
}
- } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
+ } else if (xd->mode_info_context->mbmi.mode == SPLITMV ||
+ get_2nd_order_usage(xd) == 0) {
assert(get_2nd_order_usage(xd) == 0);
vp9_dequant_idct_add_y_block_8x8(xd->qcoeff,
xd->block[0].dequant,
@@ -450,7 +451,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.v_buffer,
xd->dst.uv_stride,
xd->eobs + 16);
- } else if (mode == SPLITMV) {
+ } else if (mode == SPLITMV || get_2nd_order_usage(xd) == 0) {
assert(get_2nd_order_usage(xd) == 0);
pbi->idct_add_y_block(xd->qcoeff,
xd->block[0].dequant,
@@ -595,13 +596,8 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ x_idx * 16 + (i & 1) * 8,
stride, stride, 0, b->eob);
}
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd->eobs + 16, xd);
}
- } else {
+ } else if (get_2nd_order_usage(xd) == 1) {
vp9_dequantize_b_2x2(b);
vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct
@@ -616,12 +612,17 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd->eobs + 16, xd);
+ } else {
+ vp9_dequant_idct_add_y_block_8x8_inplace_c(
+ xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->eobs, xd);
}
+ vp9_dequant_idct_add_uv_block_8x8_inplace_c(
+ xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->eobs + 16, xd);
};
static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -653,7 +654,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.y_stride, xd->dst.y_stride);
}
}
- } else {
+ } else if (get_2nd_order_usage(xd) == 1) {
vp9_dequantize_b(b);
if (xd->eobs[24] > 1) {
vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
@@ -673,6 +674,11 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
+ } else {
+ vp9_dequant_idct_add_y_block_4x4_inplace_c(
+ xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->eobs, xd);
}
vp9_dequant_idct_add_uv_block_4x4_inplace_c(
xd->qcoeff + 16 * 16, xd->block[16].dequant,
diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h
index 2a0ae80e8..2edbd6a3a 100644
--- a/vp9/decoder/vp9_dequantize.h
+++ b/vp9/decoder/vp9_dequantize.h
@@ -77,6 +77,12 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq
const int16_t *dc,
MACROBLOCKD *xd);
+void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dst,
+ int stride,
+ uint16_t *eobs,
+ MACROBLOCKD *xd);
+
void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
unsigned char *dst,
int stride,
@@ -84,6 +90,12 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq
const int16_t *dc,
MACROBLOCKD *xd);
+void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dst,
+ int stride,
+ uint16_t *eobs,
+ MACROBLOCKD *xd);
+
void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
unsigned char *dstu,
unsigned char *dstv,
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index 152527cff..b350e4d68 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -64,6 +64,31 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
}
}
+void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q,
+ const int16_t *dq,
+ uint8_t *dst,
+ int stride,
+ uint16_t *eobs,
+ MACROBLOCKD *xd) {
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ if (*eobs++ > 1) {
+ vp9_dequant_idct_add_c(q, dq, dst, dst, stride, stride);
+ } else {
+ vp9_dc_only_idct_add_c(q[0]*dq[0], dst, dst, stride, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ dst += 4;
+ }
+
+ dst += 4 * stride - 16;
+ }
+}
+
void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
uint8_t *pre,
uint8_t *dst,
@@ -221,6 +246,26 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q,
xd->eobs[12]);
}
+void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q,
+ const int16_t *dq,
+ uint8_t *dst,
+ int stride,
+ uint16_t *eobs,
+ MACROBLOCKD *xd) {
+ vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 0, xd->eobs[0]);
+
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8,
+ dst + 8, stride, stride, 0, xd->eobs[4]);
+
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride,
+ dst + 8 * stride, stride, stride, 0,
+ xd->eobs[8]);
+
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8,
+ dst + 8 * stride + 8, stride, stride, 0,
+ xd->eobs[12]);
+}
+
void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
uint8_t *pre,
uint8_t *dst,