summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2012-11-16 12:23:06 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2012-11-16 12:23:06 -0800
commit4c7c15ee6996eea1c5911e2418554d5c64978c30 (patch)
tree2100d9bf2d66f40893a14f2c3fab4a447ca48502 /vp9
parent7bb2afa1da5327b66b054195fbb777edc0fe94fa (diff)
parente60478d46d9a692e2e7b90b35355660682bfe58b (diff)
downloadlibvpx-4c7c15ee6996eea1c5911e2418554d5c64978c30.tar
libvpx-4c7c15ee6996eea1c5911e2418554d5c64978c30.tar.gz
libvpx-4c7c15ee6996eea1c5911e2418554d5c64978c30.tar.bz2
libvpx-4c7c15ee6996eea1c5911e2418554d5c64978c30.zip
Merge "Optimize 8x8 dequant and idct" into experimental
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/generic/systemdependent.c3
-rw-r--r--vp9/common/idct.h6
-rw-r--r--vp9/common/idctllm.c121
-rw-r--r--vp9/common/rtcd_defs.sh5
-rw-r--r--vp9/decoder/decodframe.c3
-rw-r--r--vp9/decoder/dequantize.c336
-rw-r--r--vp9/decoder/idct_blk.c66
7 files changed, 303 insertions, 237 deletions
diff --git a/vp9/common/generic/systemdependent.c b/vp9/common/generic/systemdependent.c
index 749e3d358..51dfaea5f 100644
--- a/vp9/common/generic/systemdependent.c
+++ b/vp9/common/generic/systemdependent.c
@@ -29,10 +29,11 @@ void vp9_machine_specific_config(VP9_COMMON *ctx) {
rtcd->idct.iwalsh1 = vp9_short_inv_walsh4x4_1_c;
rtcd->idct.iwalsh16 = vp9_short_inv_walsh4x4_c;
rtcd->idct.idct8 = vp9_short_idct8x8_c;
+ rtcd->idct.idct10_8 = vp9_short_idct10_8x8_c;
rtcd->idct.idct1_scalar_add_8x8 = vp9_dc_only_idct_add_8x8_c;
rtcd->idct.ihaar2 = vp9_short_ihaar2x2_c;
rtcd->idct.idct16x16 = vp9_short_idct16x16_c;
- rtcd->idct.idct10_16x16 = vp9_short_idct10_16x16_c;
+ rtcd->idct.idct10_16x16 = vp9_short_idct10_16x16_c;
rtcd->subpix.eighttap16x16 = vp9_eighttap_predict16x16_c;
rtcd->subpix.eighttap8x8 = vp9_eighttap_predict8x8_c;
diff --git a/vp9/common/idct.h b/vp9/common/idct.h
index b8d3121b6..0f0478cd5 100644
--- a/vp9/common/idct.h
+++ b/vp9/common/idct.h
@@ -60,6 +60,11 @@ extern prototype_idct(vp9_idct_idct10_16x16);
#endif
extern prototype_idct(vp9_idct_idct8);
+#ifndef vp9_idct_idct10_8
+#define vp9_idct_idct10_8 vp9_short_idct10_8x8_c
+#endif
+extern prototype_idct(vp9_idct_idct10_8);
+
#ifndef vp9_idct_idct8_1
#define vp9_idct_idct8_1 vp9_short_idct8x8_1_c
#endif
@@ -132,6 +137,7 @@ typedef struct {
vp9_second_order_fn_t iwalsh16;
vp9_idct_fn_t idct8;
+ vp9_idct_fn_t idct10_8;
vp9_idct_fn_t idct8_1;
vp9_idct_scalar_add_fn_t idct1_scalar_add_8x8;
vp9_idct_fn_t ihaar2;
diff --git a/vp9/common/idctllm.c b/vp9/common/idctllm.c
index 3efc094e3..aa5665473 100644
--- a/vp9/common/idctllm.c
+++ b/vp9/common/idctllm.c
@@ -967,6 +967,127 @@ void vp9_short_idct8x8_c(short *coefs, short *block, int pitch) {
}
}
+/* Row IDCT when only first 4 coefficients are non-zero. */
+static void idctrow10(int *blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
+ (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) {
+ blk[0] = blk[1] = blk[2] = blk[3] = blk[4]
+ = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+ return;
+ }
+
+ x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */
+ /* first stage */
+ x5 = W7 * x4;
+ x4 = W1 * x4;
+ x6 = W3 * x7;
+ x7 = -W5 * x7;
+
+ /* second stage */
+ x2 = W6 * x3;
+ x3 = W2 * x3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x0 + x3;
+ x8 = x0 - x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[0] = (x7 + x1) >> 8;
+ blk[1] = (x3 + x2) >> 8;
+ blk[2] = (x0 + x4) >> 8;
+ blk[3] = (x8 + x6) >> 8;
+ blk[4] = (x8 - x6) >> 8;
+ blk[5] = (x0 - x4) >> 8;
+ blk[6] = (x3 - x2) >> 8;
+ blk[7] = (x7 - x1) >> 8;
+}
+
+/* Column (vertical) IDCT when only first 4 coefficients are non-zero. */
+static void idctcol10(int *blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* shortcut */
+ if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
+ (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) |
+ (x7 = blk[8 * 3]))) {
+ blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]
+ = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]
+ = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);
+ return;
+ }
+
+ x0 = (blk[8 * 0] << 8) + 16384;
+
+ /* first stage */
+ x5 = (W7 * x4 + 4) >> 3;
+ x4 = (W1 * x4 + 4) >> 3;
+ x6 = (W3 * x7 + 4) >> 3;
+ x7 = (-W5 * x7 + 4) >> 3;
+
+ /* second stage */
+ x2 = (W6 * x3 + 4) >> 3;
+ x3 = (W2 * x3 + 4) >> 3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+
+ /* third stage */
+ x7 = x0 + x3;
+ x8 = x0 - x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+
+ /* fourth stage */
+ blk[8 * 0] = (x7 + x1) >> 14;
+ blk[8 * 1] = (x3 + x2) >> 14;
+ blk[8 * 2] = (x0 + x4) >> 14;
+ blk[8 * 3] = (x8 + x6) >> 14;
+ blk[8 * 4] = (x8 - x6) >> 14;
+ blk[8 * 5] = (x0 - x4) >> 14;
+ blk[8 * 6] = (x3 - x2) >> 14;
+ blk[8 * 7] = (x7 - x1) >> 14;
+}
+
+void vp9_short_idct10_8x8_c(short *coefs, short *block, int pitch) {
+ int X[TX_DIM * TX_DIM];
+ int i, j;
+ int shortpitch = pitch >> 1;
+
+ for (i = 0; i < TX_DIM; i++) {
+ for (j = 0; j < TX_DIM; j++) {
+ X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1
+ + (coefs[i * TX_DIM + j] < 0)) >> 2;
+ }
+ }
+
+ /* Do first 4 row idct only since non-zero dct coefficients are all in
+ * upper-left 4x4 area. */
+ for (i = 0; i < 4; i++)
+ idctrow10(X + 8 * i);
+
+ for (i = 0; i < 8; i++)
+ idctcol10(X + i);
+
+ for (i = 0; i < TX_DIM; i++) {
+ for (j = 0; j < TX_DIM; j++) {
+ block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;
+ }
+ }
+}
void vp9_short_ihaar2x2_c(short *input, short *output, int pitch) {
int i;
diff --git a/vp9/common/rtcd_defs.sh b/vp9/common/rtcd_defs.sh
index 2c94c9e8a..c69488ff4 100644
--- a/vp9/common/rtcd_defs.sh
+++ b/vp9/common/rtcd_defs.sh
@@ -57,12 +57,9 @@ specialize vp9_dequant_idct_add_uv_block_8x8
prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, unsigned short eobs"
specialize vp9_dequant_idct_add_16x16
-prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
+prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int dc, unsigned short eobs"
specialize vp9_dequant_idct_add_8x8
-prototype void vp9_dequant_dc_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc"
-specialize vp9_dequant_dc_idct_add_8x8
-
prototype void vp9_dequant_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
specialize vp9_dequant_idct_add
diff --git a/vp9/decoder/decodframe.c b/vp9/decoder/decodframe.c
index a32423b17..af33724ed 100644
--- a/vp9/decoder/decodframe.c
+++ b/vp9/decoder/decodframe.c
@@ -461,7 +461,8 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_ht_dequant_idct_add_8x8_c(tx_type,
q, dq, pre, dst, 16, stride);
} else {
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0,
+ xd->eobs[idx]);
}
q += 64;
} else {
diff --git a/vp9/decoder/dequantize.c b/vp9/decoder/dequantize.c
index 541293e61..949a8dc0a 100644
--- a/vp9/decoder/dequantize.c
+++ b/vp9/decoder/dequantize.c
@@ -19,8 +19,8 @@
extern int dec_debug;
#endif
-static void recon(int16_t *diff, uint8_t *pred, int pitch, uint8_t *dest,
- int stride, int width, int height) {
+static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride, int width, int height) {
int r, c;
for (r = 0; r < height; r++) {
@@ -41,12 +41,34 @@ static void recon(int16_t *diff, uint8_t *pred, int pitch, uint8_t *dest,
}
}
+static void add_constant_residual(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride,
+ int width, int height) {
+ int r, c;
+
+ for (r = 0; r < height; r++) {
+ for (c = 0; c < width; c++) {
+ int a = diff + pred[c];
+
+ if (a < 0)
+ a = 0;
+ else if (a > 255)
+ a = 255;
+
+ dest[c] = (uint8_t) a;
+ }
+
+ dest += stride;
+ pred += pitch;
+ }
+}
+
void vp9_dequantize_b_c(BLOCKD *d) {
int i;
- short *DQ = d->dqcoeff;
- short *Q = d->qcoeff;
- short *DQC = d->dequant;
+ int16_t *DQ = d->dqcoeff;
+ int16_t *Q = d->qcoeff;
+ int16_t *DQC = d->dequant;
for (i = 0; i < 16; i++) {
DQ[i] = Q[i] * DQC[i];
@@ -54,11 +76,11 @@ void vp9_dequantize_b_c(BLOCKD *d) {
}
-void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
- unsigned char *pred, unsigned char *dest,
+void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, int16_t *dq,
+ uint8_t *pred, uint8_t *dest,
int pitch, int stride) {
- short output[16];
- short *diff_ptr = output;
+ int16_t output[16];
+ int16_t *diff_ptr = output;
int i;
for (i = 0; i < 16; i++) {
@@ -69,18 +91,15 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
vpx_memset(input, 0, 32);
- recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
}
-void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
- unsigned char *pred, unsigned char *dest,
+void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, int16_t *dq,
+ uint8_t *pred, uint8_t *dest,
int pitch, int stride) {
- short output[64];
- short *diff_ptr = output;
- int b, r, c;
+ int16_t output[64];
+ int16_t *diff_ptr = output;
int i;
- unsigned char *origdest = dest;
- unsigned char *origpred = pred;
input[0] = dq[0] * input[0];
for (i = 1; i < 64; i++) {
@@ -91,35 +110,13 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
vpx_memset(input, 0, 128);
- for (b = 0; b < 4; b++) {
- for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- int a = diff_ptr[c] + pred[c];
-
- if (a < 0)
- a = 0;
-
- if (a > 255)
- a = 255;
-
- dest[c] = (unsigned char) a;
- }
-
- dest += stride;
- diff_ptr += 8;
- pred += pitch;
- }
- // shift buffer pointers to next 4x4 block in the submacroblock
- diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4;
- dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4;
- pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4;
- }
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
}
-void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
- unsigned char *dest, int pitch, int stride) {
- short output[16];
- short *diff_ptr = output;
+void vp9_dequant_idct_add_c(int16_t *input, int16_t *dq, uint8_t *pred,
+ uint8_t *dest, int pitch, int stride) {
+ int16_t output[16];
+ int16_t *diff_ptr = output;
int i;
for (i = 0; i < 16; i++) {
@@ -131,17 +128,17 @@ void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
vpx_memset(input, 0, 32);
- recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
}
-void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
- unsigned char *dest, int pitch, int stride,
+void vp9_dequant_dc_idct_add_c(int16_t *input, int16_t *dq, uint8_t *pred,
+ uint8_t *dest, int pitch, int stride,
int Dc) {
int i;
- short output[16];
- short *diff_ptr = output;
+ int16_t output[16];
+ int16_t *diff_ptr = output;
- input[0] = (short)Dc;
+ input[0] = (int16_t)Dc;
for (i = 1; i < 16; i++) {
input[i] = dq[i] * input[i];
@@ -152,15 +149,15 @@ void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
vpx_memset(input, 0, 32);
- recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
}
#if CONFIG_LOSSLESS
-void vp9_dequant_idct_add_lossless_c(short *input, short *dq,
- unsigned char *pred, unsigned char *dest,
+void vp9_dequant_idct_add_lossless_c(int16_t *input, int16_t *dq,
+ uint8_t *pred, uint8_t *dest,
int pitch, int stride) {
- short output[16];
- short *diff_ptr = output;
+ int16_t output[16];
+ int16_t *diff_ptr = output;
int i;
for (i = 0; i < 16; i++) {
@@ -171,18 +168,18 @@ void vp9_dequant_idct_add_lossless_c(short *input, short *dq,
vpx_memset(input, 0, 32);
- recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
}
-void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq,
- unsigned char *pred,
- unsigned char *dest,
+void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, int16_t *dq,
+ uint8_t *pred,
+ uint8_t *dest,
int pitch, int stride, int dc) {
int i;
- short output[16];
- short *diff_ptr = output;
+ int16_t output[16];
+ int16_t *diff_ptr = output;
- input[0] = (short)dc;
+ input[0] = (int16_t)dc;
for (i = 1; i < 16; i++) {
input[i] = dq[i] * input[i];
@@ -191,18 +188,18 @@ void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq,
vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
vpx_memset(input, 0, 32);
- recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
}
#endif
void vp9_dequantize_b_2x2_c(BLOCKD *d) {
int i;
- short *DQ = d->dqcoeff;
- short *Q = d->qcoeff;
- short *DQC = d->dequant;
+ int16_t *DQ = d->dqcoeff;
+ int16_t *Q = d->qcoeff;
+ int16_t *DQC = d->dequant;
for (i = 0; i < 16; i++) {
- DQ[i] = (short)((Q[i] * DQC[i]));
+ DQ[i] = (int16_t)((Q[i] * DQC[i]));
}
#ifdef DEC_DEBUG
if (dec_debug) {
@@ -216,14 +213,12 @@ void vp9_dequantize_b_2x2_c(BLOCKD *d) {
#endif
}
-void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
- unsigned char *dest, int pitch, int stride) {
- short output[64];
- short *diff_ptr = output;
- int r, c, b;
+void vp9_dequant_idct_add_8x8_c(int16_t *input, int16_t *dq, uint8_t *pred,
+ uint8_t *dest, int pitch, int stride,
+ int dc, uint16_t eobs) {
+ int16_t output[64];
+ int16_t *diff_ptr = output;
int i;
- unsigned char *origdest = dest;
- unsigned char *origpred = pred;
#ifdef DEC_DEBUG
if (dec_debug) {
@@ -236,101 +231,57 @@ void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
}
#endif
- input[0] = input[0] * dq[0];
+ /* If dc is 1, then input[0] is the reconstructed value, do not need
+ * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+ */
+ if (!dc)
+ input[0] *= dq[0];
- // recover quantizer for 4 4x4 blocks
- for (i = 1; i < 64; i++) {
- input[i] = input[i] * dq[1];
- }
-#ifdef DEC_DEBUG
- if (dec_debug) {
- int j;
- printf("Input DQ 8x8\n");
- for (j = 0; j < 64; j++) {
- printf("%d ", input[j]);
- if (j % 8 == 7) printf("\n");
- }
- }
-#endif
-
- // the idct halves ( >> 1) the pitch
- vp9_short_idct8x8_c(input, output, 16);
-#ifdef DEC_DEBUG
- if (dec_debug) {
- int j;
- printf("Output 8x8\n");
- for (j = 0; j < 64; j++) {
- printf("%d ", output[j]);
- if (j % 8 == 7) printf("\n");
- }
- }
-#endif
-
- vpx_memset(input, 0, 128);// test what should i put here
-
- for (b = 0; b < 4; b++) {
- for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- int a = diff_ptr[c] + pred[c];
+ /* The calculation can be simplified if there are not many non-zero dct
+ * coefficients. Use eobs to decide what to do.
+ * TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
+ * Combine that with code here.
+ */
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem8x8(pred, pitch, dest, stride);
+ } else if (eobs == 1) {
+ /* DC only DCT coefficient. */
+ int16_t out;
- if (a < 0)
- a = 0;
+ /* Note: the idct1 will need to be modified accordingly whenever
+ * vp9_short_idct8x8_c() is modified. */
+ out = (input[0] + 1 + (input[0] < 0)) >> 2;
+ out = out << 3;
+ out = (out + 32) >> 7;
- if (a > 255)
- a = 255;
+ input[0] = 0;
- dest[c] = (unsigned char) a;
- }
+ add_constant_residual(out, pred, pitch, dest, stride, 8, 8);
+ } else if (eobs <= 10) {
+ input[1] = input[1] * dq[1];
+ input[2] = input[2] * dq[1];
+ input[3] = input[3] * dq[1];
+ input[8] = input[8] * dq[1];
+ input[9] = input[9] * dq[1];
+ input[10] = input[10] * dq[1];
+ input[16] = input[16] * dq[1];
+ input[17] = input[17] * dq[1];
+ input[24] = input[24] * dq[1];
- dest += stride;
- diff_ptr += 8;
- pred += pitch;
- }
- diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;
- dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;
- pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;
- }
-#ifdef DEC_DEBUG
- if (dec_debug) {
- int k, j;
- printf("Final 8x8\n");
- for (j = 0; j < 8; j++) {
- for (k = 0; k < 8; k++) {
- printf("%d ", origdest[k]);
- }
- printf("\n");
- origdest += stride;
- }
- }
-#endif
-}
+ vp9_short_idct10_8x8_c(input, output, 16);
-void vp9_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
- unsigned char *dest, int pitch, int stride,
- int Dc) { // Dc for 1st order T in some rear case
- short output[64];
- short *diff_ptr = output;
- int r, c, b;
- int i;
- unsigned char *origdest = dest;
- unsigned char *origpred = pred;
+ input[0] = input[1] = input[2] = input[3] = 0;
+ input[8] = input[9] = input[10] = 0;
+ input[16] = input[17] = 0;
+ input[24] = 0;
- input[0] = (short)Dc;// Dc is the reconstructed value, do not need dequantization
- // dc value is recovered after dequantization, since dc need not quantization
-#ifdef DEC_DEBUG
- if (dec_debug) {
- int j;
- printf("Input 8x8\n");
- for (j = 0; j < 64; j++) {
- printf("%d ", input[j]);
- if (j % 8 == 7) printf("\n");
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ } else {
+ // recover quantizer for 4 4x4 blocks
+ for (i = 1; i < 64; i++) {
+ input[i] = input[i] * dq[1];
}
- }
-#endif
- for (i = 1; i < 64; i++) {
- input[i] = input[i] * dq[1];
- }
-
#ifdef DEC_DEBUG
if (dec_debug) {
int j;
@@ -342,8 +293,8 @@ void vp9_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
}
#endif
- // the idct halves ( >> 1) the pitch
- vp9_short_idct8x8_c(input, output, 16);
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct8x8_c(input, output, 16);
#ifdef DEC_DEBUG
if (dec_debug) {
int j;
@@ -354,30 +305,11 @@ void vp9_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
}
}
#endif
- vpx_memset(input, 0, 128);
- for (b = 0; b < 4; b++) {
- for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- int a = diff_ptr[c] + pred[c];
+ vpx_memset(input, 0, 128);
- if (a < 0)
- a = 0;
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
- if (a > 255)
- a = 255;
-
- dest[c] = (unsigned char) a;
- }
-
- dest += stride;
- diff_ptr += 8;
- pred += pitch;
- }
- diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;
- dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;
- pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;
- }
#ifdef DEC_DEBUG
if (dec_debug) {
int k, j;
@@ -391,13 +323,14 @@ void vp9_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred,
}
}
#endif
+ }
}
-void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
- unsigned char *pred, unsigned char *dest,
+void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
+ int16_t *dq, uint8_t *pred, uint8_t *dest,
int pitch, int stride) {
- short output[256];
- short *diff_ptr = output;
+ int16_t output[256];
+ int16_t *diff_ptr = output;
int i;
input[0]= input[0] * dq[0];
@@ -414,7 +347,7 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
vpx_memset(input, 0, 512);
- recon(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
}
void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred,
@@ -422,7 +355,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred,
uint16_t eobs) {
int16_t output[256];
int16_t *diff_ptr = output;
- int r, c, i;
+ int i;
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
@@ -433,28 +366,15 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred,
/* DC only DCT coefficient. */
int16_t out;
+ /* Note: the idct1 will need to be modified accordingly whenever
+ * vp9_short_idct16x16_c() is modified. */
out = (input[0] * dq[0] + 2) >> 2;
out = (out + 2) >> 2;
out = (out + 4) >> 3;
input[0] = 0;
- for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
- int a = out + pred[c];
-
- if (a < 0)
- a = 0;
- else if (a > 255)
- a = 255;
-
- dest[c] = (uint8_t) a;
- }
-
- dest += stride;
- pred += pitch;
- }
-
+ add_constant_residual(out, pred, pitch, dest, stride, 16, 16);
} else if (eobs <= 10) {
input[0]= input[0] * dq[0];
input[1] = input[1] * dq[1];
@@ -475,7 +395,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred,
input[32] = input[33] = 0;
input[48] = 0;
- recon(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
} else {
input[0]= input[0] * dq[0];
@@ -488,6 +408,6 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred,
vpx_memset(input, 0, 512);
- recon(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
}
}
diff --git a/vp9/decoder/idct_blk.c b/vp9/decoder/idct_blk.c
index 0b440b476..f18c49b90 100644
--- a/vp9/decoder/idct_blk.c
+++ b/vp9/decoder/idct_blk.c
@@ -177,12 +177,21 @@ void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, short *dq,
int stride, unsigned short *eobs,
short *dc,
MACROBLOCKD *xd) {
- vp9_dequant_dc_idct_add_8x8_c(q, dq, pre, dst, 16, stride, dc[0]);
- vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, dc[1]);
- vp9_dequant_dc_idct_add_8x8_c(&q[128], dq, pre + 8 * 16,
- dst + 8 * stride, 16, stride, dc[4]);
- vp9_dequant_dc_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8,
- dst + 8 * stride + 8, 16, stride, dc[8]);
+ q[0] = dc[0];
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]);
+
+ q[64] = dc[1];
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, 1,
+ xd->eobs[4]);
+
+ q[128] = dc[4];
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, pre + 8 * 16,
+ dst + 8 * stride, 16, stride, 1, xd->eobs[8]);
+
+ q[192] = dc[8];
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8,
+ dst + 8 * stride + 8, 16, stride, 1,
+ xd->eobs[12]);
}
#if CONFIG_SUPERBLOCKS
@@ -191,13 +200,22 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq,
int stride,
unsigned short *eobs,
short *dc, MACROBLOCKD *xd) {
- vp9_dequant_dc_idct_add_8x8_c(q, dq, dst, dst, stride, stride, dc[0]);
- vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, dst + 8,
- dst + 8, stride, stride, dc[1]);
- vp9_dequant_dc_idct_add_8x8_c(&q[128], dq, dst + 8 * stride,
- dst + 8 * stride, stride, stride, dc[4]);
- vp9_dequant_dc_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8,
- dst + 8 * stride + 8, stride, stride, dc[8]);
+ q[0] = dc[0];
+ vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]);
+
+ q[64] = dc[1];
+ vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8,
+ dst + 8, stride, stride, 1, xd->eobs[4]);
+
+ q[128] = dc[4];
+ vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride,
+ dst + 8 * stride, stride, stride, 1,
+ xd->eobs[8]);
+
+ q[192] = dc[8];
+ vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8,
+ dst + 8 * stride + 8, stride, stride, 1,
+ xd->eobs[12]);
}
#endif
@@ -209,13 +227,14 @@ void vp9_dequant_idct_add_y_block_8x8_c(short *q, short *dq,
unsigned char *origdest = dst;
unsigned char *origpred = pre;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]);
vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8,
- origdest + 8, 16, stride);
+ origdest + 8, 16, stride, 0, xd->eobs[4]);
vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16,
- origdest + 8 * stride, 16, stride);
+ origdest + 8 * stride, 16, stride, 0, xd->eobs[8]);
vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8,
- origdest + 8 * stride + 8, 16, stride);
+ origdest + 8 * stride + 8, 16, stride, 0,
+ xd->eobs[12]);
}
void vp9_dequant_idct_add_uv_block_8x8_c(short *q, short *dq,
@@ -224,12 +243,12 @@ void vp9_dequant_idct_add_uv_block_8x8_c(short *q, short *dq,
unsigned char *dstv,
int stride, unsigned short *eobs,
MACROBLOCKD *xd) {
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]);
q += 64;
pre += 64;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]);
}
#if CONFIG_SUPERBLOCKS
@@ -239,11 +258,12 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq,
int stride,
unsigned short *eobs,
MACROBLOCKD *xd) {
- vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride);
-
- q += 64;
+ vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0,
+ xd->eobs[16]);
- vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride);
+ q += 64;
+ vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0,
+ xd->eobs[20]);
}
#endif