summaryrefslogtreecommitdiff
path: root/vp9/decoder/vp9_dequantize.c
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2012-12-12 15:49:39 -0800
committerScott LaVarnway <slavarnway@google.com>2012-12-12 15:49:39 -0800
commitb575394e215ea46c9885992d85c3047de5171f4c (patch)
tree218751930146b341518524af1e2c13e0e7be4cb6 /vp9/decoder/vp9_dequantize.c
parentb192d99f7367871908551ca940e06b7cb94e760b (diff)
downloadlibvpx-b575394e215ea46c9885992d85c3047de5171f4c.tar
libvpx-b575394e215ea46c9885992d85c3047de5171f4c.tar.gz
libvpx-b575394e215ea46c9885992d85c3047de5171f4c.tar.bz2
libvpx-b575394e215ea46c9885992d85c3047de5171f4c.zip
Improved vp9_ihtllm_c
As suggested by Yaowu, we can use eob to reduce the complexity of the vp9_ihtllm_c function. For the 1080p test clip used, the decoder performance improved by 17%. Change-Id: I32486f2f06f9b8f60467d2a574209aa3a3daa435
Diffstat (limited to 'vp9/decoder/vp9_dequantize.c')
-rw-r--r--vp9/decoder/vp9_dequantize.c56
1 files changed, 32 insertions, 24 deletions
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
index 79114d58c..39a2de14b 100644
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -13,7 +13,6 @@
#include "vp9/decoder/vp9_dequantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/decoder/vp9_onyxd_int.h"
-
static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
uint8_t *dest, int stride, int width, int height) {
int r, c;
@@ -74,7 +73,7 @@ void vp9_dequantize_b_c(BLOCKD *d) {
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride) {
+ int pitch, int stride, uint16_t eobs) {
int16_t output[16];
int16_t *diff_ptr = output;
int i;
@@ -83,7 +82,7 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
input[i] = dq[i] * input[i];
}
- vp9_ihtllm(input, output, 4 << 1, tx_type, 4);
+ vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);
vpx_memset(input, 0, 32);
@@ -93,21 +92,25 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride) {
+ int pitch, int stride, uint16_t eobs) {
int16_t output[64];
int16_t *diff_ptr = output;
int i;
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem8x8(pred, pitch, dest, stride);
+ } else if (eobs > 0) {
+ input[0] = dq[0] * input[0];
+ for (i = 1; i < 64; i++) {
+ input[i] = dq[1] * input[i];
+ }
- input[0] = dq[0] * input[0];
- for (i = 1; i < 64; i++) {
- input[i] = dq[1] * input[i];
- }
-
- vp9_ihtllm(input, output, 16, tx_type, 8);
+ vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
- vpx_memset(input, 0, 128);
+ vpx_memset(input, 0, 128);
- add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ }
}
void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
@@ -269,26 +272,31 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride) {
+ uint8_t *dest, int pitch, int stride,
+ uint16_t eobs) {
int16_t output[256];
int16_t *diff_ptr = output;
int i;
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem16x16(pred, pitch, dest, stride);
+ } else if (eobs > 0) {
+ input[0]= input[0] * dq[0];
- input[0]= input[0] * dq[0];
-
- // recover quantizer for 4 4x4 blocks
- for (i = 1; i < 256; i++)
- input[i] = input[i] * dq[1];
+ // recover quantizer for 4 4x4 blocks
+ for (i = 1; i < 256; i++)
+ input[i] = input[i] * dq[1];
- // inverse hybrid transform
- vp9_ihtllm(input, output, 32, tx_type, 16);
+ // inverse hybrid transform
+ vp9_ihtllm(input, output, 32, tx_type, 16, eobs);
- // the idct halves ( >> 1) the pitch
- // vp9_short_idct16x16_c(input, output, 32);
+ // the idct halves ( >> 1) the pitch
+ // vp9_short_idct16x16_c(input, output, 32);
- vpx_memset(input, 0, 512);
+ vpx_memset(input, 0, 512);
- add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ }
}
void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,