summaryrefslogtreecommitdiff
path: root/vp9/decoder
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-07-07 15:32:27 -0700
committerJingning Han <jingning@google.com>2015-07-08 09:26:02 -0700
commit7e0d0de2113fcfa255803814f5c8872ddf490f65 (patch)
tree31cc2df36ac00d949b97d4c237930baa8d84f59b /vp9/decoder
parent76ccba9ec88fd421814a21f8049a16e71ae9d119 (diff)
downloadlibvpx-7e0d0de2113fcfa255803814f5c8872ddf490f65.tar
libvpx-7e0d0de2113fcfa255803814f5c8872ddf490f65.tar.gz
libvpx-7e0d0de2113fcfa255803814f5c8872ddf490f65.tar.bz2
libvpx-7e0d0de2113fcfa255803814f5c8872ddf490f65.zip
Refactor inverse_transform_block argument list
Replace block index with transform type in the argument list. This allows to save an extra fetch to the prediction mode. For pedestrian area 1080p coded at 5 Mbps with single tile, the average decoding speed goes up from 80.55 fps (before the refactoring series) to 81.13 fps. Change-Id: Icbebf84ce63c19c0c92f3690ed201f6c3eab7881
Diffstat (limited to 'vp9/decoder')
-rw-r--r--vp9/decoder/vp9_decodeframe.c131
1 files changed, 102 insertions, 29 deletions
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 699f18794..6d2a92b1e 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -182,35 +182,119 @@ static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
}
}
-static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
- TX_SIZE tx_size, uint8_t *dst, int stride,
- int eob) {
+static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
+ const TX_SIZE tx_size,
+ uint8_t *dst, int stride,
+ int eob) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ if (eob > 0) {
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless) {
+ vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_8X8:
+ vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_16X16:
+ vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_32X32:
+ vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ }
+ }
+ } else {
+ if (xd->lossless) {
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_idct4x4_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ vp9_idct8x8_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ vp9_idct16x16_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ return;
+ }
+ }
+ }
+#else
+ if (xd->lossless) {
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_idct4x4_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ vp9_idct8x8_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ vp9_idct16x16_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ return;
+ }
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (eob == 1) {
+ dqcoeff[0] = 0;
+ } else {
+ if (tx_size <= TX_16X16 && eob <= 10)
+ memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
+ else if (tx_size == TX_32X32 && eob <= 34)
+ memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
+ else
+ memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
+ }
+ }
+}
+
+static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
+ const TX_TYPE tx_type,
+ const TX_SIZE tx_size,
+ uint8_t *dst, int stride,
+ int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
if (eob > 0) {
- TX_TYPE tx_type = DCT_DCT;
tran_low_t *const dqcoeff = pd->dqcoeff;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (xd->lossless) {
- tx_type = DCT_DCT;
vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
} else {
- const PLANE_TYPE plane_type = pd->plane_type;
switch (tx_size) {
case TX_4X4:
- tx_type = get_tx_type_4x4(plane_type, xd, block);
vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
break;
case TX_8X8:
- tx_type = get_tx_type(plane_type, xd);
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
break;
case TX_16X16:
- tx_type = get_tx_type(plane_type, xd);
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
break;
case TX_32X32:
- tx_type = DCT_DCT;
vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
break;
default:
@@ -219,25 +303,19 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
}
} else {
if (xd->lossless) {
- tx_type = DCT_DCT;
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
- const PLANE_TYPE plane_type = pd->plane_type;
switch (tx_size) {
case TX_4X4:
- tx_type = get_tx_type_4x4(plane_type, xd, block);
vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_8X8:
- tx_type = get_tx_type(plane_type, xd);
vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_16X16:
- tx_type = get_tx_type(plane_type, xd);
vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_32X32:
- tx_type = DCT_DCT;
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
break;
default:
@@ -248,25 +326,19 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
}
#else
if (xd->lossless) {
- tx_type = DCT_DCT;
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
- const PLANE_TYPE plane_type = pd->plane_type;
switch (tx_size) {
case TX_4X4:
- tx_type = get_tx_type_4x4(plane_type, xd, block);
vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_8X8:
- tx_type = get_tx_type(plane_type, xd);
vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_16X16:
- tx_type = get_tx_type(plane_type, xd);
vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
break;
case TX_32X32:
- tx_type = DCT_DCT;
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
break;
default:
@@ -315,14 +387,15 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
x, y, plane);
if (!mi->mbmi.skip) {
+ const TX_TYPE tx_type = (plane || xd->lossless) ?
+ DCT_DCT : intra_mode_to_tx_type_lookup[mode];
const scan_order *sc = (plane || xd->lossless) ?
- &vp9_default_scan_orders[tx_size] :
- &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
+ &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type];
const int eob = vp9_decode_block_tokens(xd, plane, sc,
plane_bsize, x, y, tx_size,
args->r, args->seg_id);
- inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
- eob);
+ inverse_transform_block_intra(xd, plane, tx_type, tx_size,
+ dst, pd->dst.stride, eob);
}
}
@@ -344,9 +417,9 @@ static void reconstruct_inter_block(int plane, int block,
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
eob = vp9_decode_block_tokens(xd, plane, sc, plane_bsize,
x, y, tx_size, args->r, args->seg_id);
- inverse_transform_block(xd, plane, block, tx_size,
- &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
- pd->dst.stride, eob);
+ inverse_transform_block_inter(xd, plane, tx_size,
+ &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
+ pd->dst.stride, eob);
*args->eobtotal += eob;
}