summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2012-12-12 15:49:39 -0800
committerScott LaVarnway <slavarnway@google.com>2012-12-12 15:49:39 -0800
commitb575394e215ea46c9885992d85c3047de5171f4c (patch)
tree218751930146b341518524af1e2c13e0e7be4cb6 /vp9/common
parentb192d99f7367871908551ca940e06b7cb94e760b (diff)
downloadlibvpx-b575394e215ea46c9885992d85c3047de5171f4c.tar
libvpx-b575394e215ea46c9885992d85c3047de5171f4c.tar.gz
libvpx-b575394e215ea46c9885992d85c3047de5171f4c.tar.bz2
libvpx-b575394e215ea46c9885992d85c3047de5171f4c.zip
Improved vp9_ihtllm_c
As suggested by Yaowu, we can use eob to reduce the complexity of the vp9_ihtllm_c function. For the 1080p test clip used, the decoder performance improved by 17%. Change-Id: I32486f2f06f9b8f60467d2a574209aa3a3daa435
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_idctllm.c22
-rw-r--r--vp9/common/vp9_invtrans.c10
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
3 files changed, 25 insertions, 9 deletions
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 9622dfdee..897514ee1 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -404,8 +404,9 @@ void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch,
#define HORIZONTAL_SHIFT 17 // 15
#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
- TX_TYPE tx_type, int tx_dim) {
+ TX_TYPE tx_type, int tx_dim, uint16_t eobs) {
int i, j, k;
+ int nz_dim;
int16_t imbuf[256];
const int16_t *ip = input;
@@ -444,12 +445,25 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
break;
}
+ nz_dim = tx_dim;
+ if(tx_dim > 4) {
+ if(eobs < 36) {
+ vpx_memset(im, 0, 512);
+ nz_dim = 8;
+ if(eobs < 3) {
+ nz_dim = 2;
+ } else if(eobs < 10) {
+ nz_dim = 4;
+ }
+ }
+ }
+
/* vertical transformation */
for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
+ for (i = 0; i < nz_dim; i++) {
int temp = 0;
- for (k = 0; k < tx_dim; k++) {
+ for (k = 0; k < nz_dim; k++) {
temp += ptv[k] * ip[(k * tx_dim)];
}
@@ -470,7 +484,7 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
for (i = 0; i < tx_dim; i++) {
int temp = 0;
- for (k = 0; k < tx_dim; k++) {
+ for (k = 0; k < nz_dim; k++) {
temp += im[k] * pthc[k];
}
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index c78f1ad3c..eff919865 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -52,7 +52,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
- tx_type, 4);
+ tx_type, 4, xd->block[i].eob);
} else {
vp9_inverse_transform_b_4x4(xd, i, 32);
}
@@ -91,7 +91,8 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
for (i = 0; i < 9; i += 8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8);
+ vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+ xd->block[i].eob);
} else {
vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
&blockd[i].diff[0], 32);
@@ -100,7 +101,8 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
for (i = 2; i < 11; i += 8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8);
+ vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+ xd->block[i + 2].eob);
} else {
vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
&blockd[i].diff[0], 32);
@@ -132,7 +134,7 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
BLOCKD *bd = &xd->block[0];
TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16);
+ vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob);
} else {
vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
&xd->block[0].diff[0], 32);
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 5b7af100b..e8981ce5e 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -361,7 +361,7 @@ specialize vp9_short_idct16x16
prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
specialize vp9_short_idct10_16x16
-prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim"
+prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim, short eobs"
specialize vp9_ihtllm
#