From aa90983696548bc808c07bcafa1b6790c3d440af Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Wed, 25 May 2016 12:24:48 -0700 Subject: Move range checks into WRAPLOW Provides more comprehensive coverage for --enable-coefficient-checking. The intent is to make the --enable-coefficient-checking option consistent with the VP9 spec. Change-Id: I12d0120756d17572ca2b2d7e6a2ab9d8071d8d58 --- vpx_dsp/inv_txfm.c | 1700 ++++++++++++++++++++++++++-------------------------- vpx_dsp/inv_txfm.h | 44 +- 2 files changed, 877 insertions(+), 867 deletions(-) (limited to 'vpx_dsp') diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index ff7533745..ad91eadbe 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -35,10 +35,10 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { c1 = e1 - c1; a1 -= b1; d1 += c1; - op[0] = WRAPLOW(a1, 8); - op[1] = WRAPLOW(b1, 8); - op[2] = WRAPLOW(c1, 8); - op[3] = WRAPLOW(d1, 8); + op[0] = WRAPLOW(a1); + op[1] = WRAPLOW(b1); + op[2] = WRAPLOW(c1); + op[3] = WRAPLOW(d1); ip += 4; op += 4; } @@ -76,8 +76,8 @@ void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; - op[0] = WRAPLOW(a1, 8); - op[1] = op[2] = op[3] = WRAPLOW(e1, 8); + op[0] = WRAPLOW(a1); + op[1] = op[2] = op[3] = WRAPLOW(e1); ip = tmp; for (i = 0; i < 4; i++) { @@ -98,18 +98,18 @@ void idct4_c(const tran_low_t *input, tran_low_t *output) { // stage 1 temp1 = (input[0] + input[2]) * cospi_16_64; temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step[0] = WRAPLOW(dct_const_round_shift(temp1)); + step[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); + step[2] = WRAPLOW(dct_const_round_shift(temp1)); + step[3] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 - output[0] = WRAPLOW(step[0] + step[3], 8); - output[1] = WRAPLOW(step[1] + step[2], 8); - output[2] = WRAPLOW(step[1] - step[2], 8); - output[3] = WRAPLOW(step[0] - step[3], 8); + output[0] = WRAPLOW(step[0] + step[3]); + output[1] = WRAPLOW(step[1] + step[2]); + output[2] = WRAPLOW(step[1] - step[2]); + output[3] = WRAPLOW(step[0] - step[3]); } void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -141,8 +141,8 @@ void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride) { int i; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { @@ -164,48 +164,48 @@ void idct8_c(const tran_low_t *input, tran_low_t *output) { step1[3] = input[6]; temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 temp1 = (step1[0] + step1[2]) * cospi_16_64; temp2 = (step1[0] - step1[2]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); // stage 3 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7], 8); - output[1] = WRAPLOW(step1[1] + step1[6], 8); - output[2] = WRAPLOW(step1[2] + step1[5], 8); - output[3] = WRAPLOW(step1[3] + step1[4], 8); - output[4] = WRAPLOW(step1[3] - step1[4], 8); - output[5] = WRAPLOW(step1[2] - step1[5], 8); - output[6] = WRAPLOW(step1[1] - step1[6], 8); - output[7] = WRAPLOW(step1[0] - step1[7], 8); + output[0] = WRAPLOW(step1[0] + step1[7]); + output[1] = WRAPLOW(step1[1] + step1[6]); + output[2] = WRAPLOW(step1[2] + step1[5]); + output[3] = WRAPLOW(step1[3] + step1[4]); + output[4] = WRAPLOW(step1[3] - step1[4]); + output[5] = WRAPLOW(step1[2] - step1[5]); + output[6] = WRAPLOW(step1[1] - step1[6]); + output[7] = WRAPLOW(step1[0] - step1[7]); } void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -236,8 +236,8 @@ void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) @@ -277,10 +277,10 @@ void iadst4_c(const tran_low_t *input, tran_low_t *output) { // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. - output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8); - output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8); - output[2] = WRAPLOW(dct_const_round_shift(s2), 8); - output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8); + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); + output[2] = WRAPLOW(dct_const_round_shift(s2)); + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); } void iadst8_c(const tran_low_t *input, tran_low_t *output) { @@ -311,14 +311,14 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); - x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); - x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); - x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); - x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); - x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); - x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); - x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); + x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); + x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); + x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); + x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); + x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); + x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); + x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); // stage 2 s0 = (int)x0; @@ -330,14 +330,14 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); - x0 = WRAPLOW(s0 + s2, 8); - x1 = WRAPLOW(s1 + s3, 8); - x2 = WRAPLOW(s0 - s2, 8); - x3 = WRAPLOW(s1 - s3, 8); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); + x0 = WRAPLOW(s0 + s2); + x1 = WRAPLOW(s1 + s3); + x2 = WRAPLOW(s0 - s2); + x3 = WRAPLOW(s1 - s3); + x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); + x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); // stage 3 s2 = (int)(cospi_16_64 * (x2 + x3)); @@ -345,19 +345,19 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { s6 = (int)(cospi_16_64 * (x6 + x7)); s7 = (int)(cospi_16_64 * (x6 - x7)); - x2 = WRAPLOW(dct_const_round_shift(s2), 8); - x3 = WRAPLOW(dct_const_round_shift(s3), 8); - x6 = WRAPLOW(dct_const_round_shift(s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s7), 8); - - output[0] = WRAPLOW(x0, 8); - output[1] = WRAPLOW(-x4, 8); - output[2] = WRAPLOW(x6, 8); - output[3] = WRAPLOW(-x2, 8); - output[4] = WRAPLOW(x3, 8); - output[5] = WRAPLOW(-x7, 8); - output[6] = WRAPLOW(x5, 8); - output[7] = WRAPLOW(-x1, 8); + x2 = WRAPLOW(dct_const_round_shift(s2)); + x3 = WRAPLOW(dct_const_round_shift(s3)); + x6 = WRAPLOW(dct_const_round_shift(s6)); + x7 = WRAPLOW(dct_const_round_shift(s7)); + + output[0] = WRAPLOW(x0); + output[1] = WRAPLOW(-x4); + output[2] = WRAPLOW(x6); + output[3] = WRAPLOW(-x2); + output[4] = WRAPLOW(x3); + output[5] = WRAPLOW(-x7); + output[6] = WRAPLOW(x5); + output[7] = WRAPLOW(-x1); } void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -420,23 +420,23 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) { temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[8] = WRAPLOW(dct_const_round_shift(temp1)); + step2[15] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); // stage 3 step1[0] = step2[0]; @@ -446,109 +446,109 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) { temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step1[8] = WRAPLOW(step2[8] + step2[9], 8); - step1[9] = WRAPLOW(step2[8] - step2[9], 8); - step1[10] = WRAPLOW(-step2[10] + step2[11], 8); - step1[11] = WRAPLOW(step2[10] + step2[11], 8); - step1[12] = WRAPLOW(step2[12] + step2[13], 8); - step1[13] = WRAPLOW(step2[12] - step2[13], 8); - step1[14] = WRAPLOW(-step2[14] + step2[15], 8); - step1[15] = WRAPLOW(step2[14] + step2[15], 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + step1[8] = WRAPLOW(step2[8] + step2[9]); + step1[9] = WRAPLOW(step2[8] - step2[9]); + step1[10] = WRAPLOW(-step2[10] + step2[11]); + step1[11] = WRAPLOW(step2[10] + step2[11]); + step1[12] = WRAPLOW(step2[12] + step2[13]); + step1[13] = WRAPLOW(step2[12] - step2[13]); + step1[14] = WRAPLOW(-step2[14] + step2[15]); + step1[15] = WRAPLOW(step2[14] + step2[15]); // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], 8); - step1[9] = WRAPLOW(step2[9] + step2[10], 8); - step1[10] = WRAPLOW(step2[9] - step2[10], 8); - step1[11] = WRAPLOW(step2[8] - step2[11], 8); - step1[12] = WRAPLOW(-step2[12] + step2[15], 8); - step1[13] = WRAPLOW(-step2[13] + step2[14], 8); - step1[14] = WRAPLOW(step2[13] + step2[14], 8); - step1[15] = WRAPLOW(step2[12] + step2[15], 8); + step1[8] = WRAPLOW(step2[8] + step2[11]); + step1[9] = WRAPLOW(step2[9] + step2[10]); + step1[10] = WRAPLOW(step2[9] - step2[10]); + step1[11] = WRAPLOW(step2[8] - step2[11]); + step1[12] = WRAPLOW(-step2[12] + step2[15]); + step1[13] = WRAPLOW(-step2[13] + step2[14]); + step1[14] = WRAPLOW(step2[13] + step2[14]); + step1[15] = WRAPLOW(step2[12] + step2[15]); // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], 8); - step2[1] = WRAPLOW(step1[1] + step1[6], 8); - step2[2] = WRAPLOW(step1[2] + step1[5], 8); - step2[3] = WRAPLOW(step1[3] + step1[4], 8); - step2[4] = WRAPLOW(step1[3] - step1[4], 8); - step2[5] = WRAPLOW(step1[2] - step1[5], 8); - step2[6] = WRAPLOW(step1[1] - step1[6], 8); - step2[7] = WRAPLOW(step1[0] - step1[7], 8); + step2[0] = WRAPLOW(step1[0] + step1[7]); + step2[1] = WRAPLOW(step1[1] + step1[6]); + step2[2] = WRAPLOW(step1[2] + step1[5]); + step2[3] = WRAPLOW(step1[3] + step1[4]); + step2[4] = WRAPLOW(step1[3] - step1[4]); + step2[5] = WRAPLOW(step1[2] - step1[5]); + step2[6] = WRAPLOW(step1[1] - step1[6]); + step2[7] = WRAPLOW(step1[0] - step1[7]); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15], 8); - output[1] = WRAPLOW(step2[1] + step2[14], 8); - output[2] = WRAPLOW(step2[2] + step2[13], 8); - output[3] = WRAPLOW(step2[3] + step2[12], 8); - output[4] = WRAPLOW(step2[4] + step2[11], 8); - output[5] = WRAPLOW(step2[5] + step2[10], 8); - output[6] = WRAPLOW(step2[6] + step2[9], 8); - output[7] = WRAPLOW(step2[7] + step2[8], 8); - output[8] = WRAPLOW(step2[7] - step2[8], 8); - output[9] = WRAPLOW(step2[6] - step2[9], 8); - output[10] = WRAPLOW(step2[5] - step2[10], 8); - output[11] = WRAPLOW(step2[4] - step2[11], 8); - output[12] = WRAPLOW(step2[3] - step2[12], 8); - output[13] = WRAPLOW(step2[2] - step2[13], 8); - output[14] = WRAPLOW(step2[1] - step2[14], 8); - output[15] = WRAPLOW(step2[0] - step2[15], 8); + output[0] = WRAPLOW(step2[0] + step2[15]); + output[1] = WRAPLOW(step2[1] + step2[14]); + output[2] = WRAPLOW(step2[2] + step2[13]); + output[3] = WRAPLOW(step2[3] + step2[12]); + output[4] = WRAPLOW(step2[4] + step2[11]); + output[5] = WRAPLOW(step2[5] + step2[10]); + output[6] = WRAPLOW(step2[6] + step2[9]); + output[7] = WRAPLOW(step2[7] + step2[8]); + output[8] = WRAPLOW(step2[7] - step2[8]); + output[9] = WRAPLOW(step2[6] - step2[9]); + output[10] = WRAPLOW(step2[5] - step2[10]); + output[11] = WRAPLOW(step2[4] - step2[11]); + output[12] = WRAPLOW(step2[3] - step2[12]); + output[13] = WRAPLOW(step2[2] - step2[13]); + output[14] = WRAPLOW(step2[1] - step2[14]); + output[15] = WRAPLOW(step2[0] - step2[15]); } void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, @@ -625,22 +625,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - x0 = WRAPLOW(dct_const_round_shift(s0 + s8), 8); - x1 = WRAPLOW(dct_const_round_shift(s1 + s9), 8); - x2 = WRAPLOW(dct_const_round_shift(s2 + s10), 8); - x3 = WRAPLOW(dct_const_round_shift(s3 + s11), 8); - x4 = WRAPLOW(dct_const_round_shift(s4 + s12), 8); - x5 = WRAPLOW(dct_const_round_shift(s5 + s13), 8); - x6 = WRAPLOW(dct_const_round_shift(s6 + s14), 8); - x7 = WRAPLOW(dct_const_round_shift(s7 + s15), 8); - x8 = WRAPLOW(dct_const_round_shift(s0 - s8), 8); - x9 = WRAPLOW(dct_const_round_shift(s1 - s9), 8); - x10 = WRAPLOW(dct_const_round_shift(s2 - s10), 8); - x11 = WRAPLOW(dct_const_round_shift(s3 - s11), 8); - x12 = WRAPLOW(dct_const_round_shift(s4 - s12), 8); - x13 = WRAPLOW(dct_const_round_shift(s5 - s13), 8); - x14 = WRAPLOW(dct_const_round_shift(s6 - s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s7 - s15), 8); + x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); + x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); + x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); + x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); + x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); + x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); + x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); + x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); + x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); + x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); + x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); + x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); + x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); + x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); // stage 2 s0 = x0; @@ -660,22 +660,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - x0 = WRAPLOW(s0 + s4, 8); - x1 = WRAPLOW(s1 + s5, 8); - x2 = WRAPLOW(s2 + s6, 8); - x3 = WRAPLOW(s3 + s7, 8); - x4 = WRAPLOW(s0 - s4, 8); - x5 = WRAPLOW(s1 - s5, 8); - x6 = WRAPLOW(s2 - s6, 8); - x7 = WRAPLOW(s3 - s7, 8); - x8 = WRAPLOW(dct_const_round_shift(s8 + s12), 8); - x9 = WRAPLOW(dct_const_round_shift(s9 + s13), 8); - x10 = WRAPLOW(dct_const_round_shift(s10 + s14), 8); - x11 = WRAPLOW(dct_const_round_shift(s11 + s15), 8); - x12 = WRAPLOW(dct_const_round_shift(s8 - s12), 8); - x13 = WRAPLOW(dct_const_round_shift(s9 - s13), 8); - x14 = WRAPLOW(dct_const_round_shift(s10 - s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s11 - s15), 8); + x0 = WRAPLOW(s0 + s4); + x1 = WRAPLOW(s1 + s5); + x2 = WRAPLOW(s2 + s6); + x3 = WRAPLOW(s3 + s7); + x4 = WRAPLOW(s0 - s4); + x5 = WRAPLOW(s1 - s5); + x6 = WRAPLOW(s2 - s6); + x7 = WRAPLOW(s3 - s7); + x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); + x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); + x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); + x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); + x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); + x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); + x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); // stage 3 s0 = x0; @@ -695,22 +695,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - x0 = WRAPLOW(check_range(s0 + s2), 8); - x1 = WRAPLOW(check_range(s1 + s3), 8); - x2 = WRAPLOW(check_range(s0 - s2), 8); - x3 = WRAPLOW(check_range(s1 - s3), 8); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); - x8 = WRAPLOW(check_range(s8 + s10), 8); - x9 = WRAPLOW(check_range(s9 + s11), 8); - x10 = WRAPLOW(check_range(s8 - s10), 8); - x11 = WRAPLOW(check_range(s9 - s11), 8); - x12 = WRAPLOW(dct_const_round_shift(s12 + s14), 8); - x13 = WRAPLOW(dct_const_round_shift(s13 + s15), 8); - x14 = WRAPLOW(dct_const_round_shift(s12 - s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s13 - s15), 8); + x0 = WRAPLOW(s0 + s2); + x1 = WRAPLOW(s1 + s3); + x2 = WRAPLOW(s0 - s2); + x3 = WRAPLOW(s1 - s3); + x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); + x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); + x8 = WRAPLOW(s8 + s10); + x9 = WRAPLOW(s9 + s11); + x10 = WRAPLOW(s8 - s10); + x11 = WRAPLOW(s9 - s11); + x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); + x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); + x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); // stage 4 s2 = (- cospi_16_64) * (x2 + x3); @@ -722,31 +722,31 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = (- cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); - x2 = WRAPLOW(dct_const_round_shift(s2), 8); - x3 = WRAPLOW(dct_const_round_shift(s3), 8); - x6 = WRAPLOW(dct_const_round_shift(s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s7), 8); - x10 = WRAPLOW(dct_const_round_shift(s10), 8); - x11 = WRAPLOW(dct_const_round_shift(s11), 8); - x14 = WRAPLOW(dct_const_round_shift(s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s15), 8); - - output[0] = WRAPLOW(x0, 8); - output[1] = WRAPLOW(-x8, 8); - output[2] = WRAPLOW(x12, 8); - output[3] = WRAPLOW(-x4, 8); - output[4] = WRAPLOW(x6, 8); - output[5] = WRAPLOW(x14, 8); - output[6] = WRAPLOW(x10, 8); - output[7] = WRAPLOW(x2, 8); - output[8] = WRAPLOW(x3, 8); - output[9] = WRAPLOW(x11, 8); - output[10] = WRAPLOW(x15, 8); - output[11] = WRAPLOW(x7, 8); - output[12] = WRAPLOW(x5, 8); - output[13] = WRAPLOW(-x13, 8); - output[14] = WRAPLOW(x9, 8); - output[15] = WRAPLOW(-x1, 8); + x2 = WRAPLOW(dct_const_round_shift(s2)); + x3 = WRAPLOW(dct_const_round_shift(s3)); + x6 = WRAPLOW(dct_const_round_shift(s6)); + x7 = WRAPLOW(dct_const_round_shift(s7)); + x10 = WRAPLOW(dct_const_round_shift(s10)); + x11 = WRAPLOW(dct_const_round_shift(s11)); + x14 = WRAPLOW(dct_const_round_shift(s14)); + x15 = WRAPLOW(dct_const_round_shift(s15)); + + output[0] = WRAPLOW(x0); + output[1] = WRAPLOW(-x8); + output[2] = WRAPLOW(x12); + output[3] = WRAPLOW(-x4); + output[4] = WRAPLOW(x6); + output[5] = WRAPLOW(x14); + output[6] = WRAPLOW(x10); + output[7] = WRAPLOW(x2); + output[8] = WRAPLOW(x3); + output[9] = WRAPLOW(x11); + output[10] = WRAPLOW(x15); + output[11] = WRAPLOW(x7); + output[12] = WRAPLOW(x5); + output[13] = WRAPLOW(-x13); + output[14] = WRAPLOW(x9); + output[15] = WRAPLOW(-x1); } void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, @@ -779,8 +779,8 @@ void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) @@ -813,43 +813,43 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[31] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[16] = WRAPLOW(dct_const_round_shift(temp1)); + step1[31] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[17] = WRAPLOW(dct_const_round_shift(temp1)); + step1[30] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[19] = WRAPLOW(dct_const_round_shift(temp1)); + step1[28] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[23] = WRAPLOW(dct_const_round_shift(temp1)); + step1[24] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 step2[0] = step1[0]; @@ -863,40 +863,40 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[8] = WRAPLOW(dct_const_round_shift(temp1)); + step2[15] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step2[16] = WRAPLOW(step1[16] + step1[17], 8); - step2[17] = WRAPLOW(step1[16] - step1[17], 8); - step2[18] = WRAPLOW(-step1[18] + step1[19], 8); - step2[19] = WRAPLOW(step1[18] + step1[19], 8); - step2[20] = WRAPLOW(step1[20] + step1[21], 8); - step2[21] = WRAPLOW(step1[20] - step1[21], 8); - step2[22] = WRAPLOW(-step1[22] + step1[23], 8); - step2[23] = WRAPLOW(step1[22] + step1[23], 8); - step2[24] = WRAPLOW(step1[24] + step1[25], 8); - step2[25] = WRAPLOW(step1[24] - step1[25], 8); - step2[26] = WRAPLOW(-step1[26] + step1[27], 8); - step2[27] = WRAPLOW(step1[26] + step1[27], 8); - step2[28] = WRAPLOW(step1[28] + step1[29], 8); - step2[29] = WRAPLOW(step1[28] - step1[29], 8); - step2[30] = WRAPLOW(-step1[30] + step1[31], 8); - step2[31] = WRAPLOW(step1[30] + step1[31], 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); + + step2[16] = WRAPLOW(step1[16] + step1[17]); + step2[17] = WRAPLOW(step1[16] - step1[17]); + step2[18] = WRAPLOW(-step1[18] + step1[19]); + step2[19] = WRAPLOW(step1[18] + step1[19]); + step2[20] = WRAPLOW(step1[20] + step1[21]); + step2[21] = WRAPLOW(step1[20] - step1[21]); + step2[22] = WRAPLOW(-step1[22] + step1[23]); + step2[23] = WRAPLOW(step1[22] + step1[23]); + step2[24] = WRAPLOW(step1[24] + step1[25]); + step2[25] = WRAPLOW(step1[24] - step1[25]); + step2[26] = WRAPLOW(-step1[26] + step1[27]); + step2[27] = WRAPLOW(step1[26] + step1[27]); + step2[28] = WRAPLOW(step1[28] + step1[29]); + step2[29] = WRAPLOW(step1[28] - step1[29]); + step2[30] = WRAPLOW(-step1[30] + step1[31]); + step2[31] = WRAPLOW(step1[30] + step1[31]); // stage 3 step1[0] = step2[0]; @@ -906,42 +906,42 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step1[8] = WRAPLOW(step2[8] + step2[9], 8); - step1[9] = WRAPLOW(step2[8] - step2[9], 8); - step1[10] = WRAPLOW(-step2[10] + step2[11], 8); - step1[11] = WRAPLOW(step2[10] + step2[11], 8); - step1[12] = WRAPLOW(step2[12] + step2[13], 8); - step1[13] = WRAPLOW(step2[12] - step2[13], 8); - step1[14] = WRAPLOW(-step2[14] + step2[15], 8); - step1[15] = WRAPLOW(step2[14] + step2[15], 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + step1[8] = WRAPLOW(step2[8] + step2[9]); + step1[9] = WRAPLOW(step2[8] - step2[9]); + step1[10] = WRAPLOW(-step2[10] + step2[11]); + step1[11] = WRAPLOW(step2[10] + step2[11]); + step1[12] = WRAPLOW(step2[12] + step2[13]); + step1[13] = WRAPLOW(step2[12] - step2[13]); + step1[14] = WRAPLOW(-step2[14] + step2[15]); + step1[15] = WRAPLOW(step2[14] + step2[15]); step1[16] = step2[16]; step1[31] = step2[31]; temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[17] = WRAPLOW(dct_const_round_shift(temp1)); + step1[30] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); step1[19] = step2[19]; step1[20] = step2[20]; temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; @@ -950,87 +950,87 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); step2[11] = step1[11]; step2[12] = step1[12]; - step2[16] = WRAPLOW(step1[16] + step1[19], 8); - step2[17] = WRAPLOW(step1[17] + step1[18], 8); - step2[18] = WRAPLOW(step1[17] - step1[18], 8); - step2[19] = WRAPLOW(step1[16] - step1[19], 8); - step2[20] = WRAPLOW(-step1[20] + step1[23], 8); - step2[21] = WRAPLOW(-step1[21] + step1[22], 8); - step2[22] = WRAPLOW(step1[21] + step1[22], 8); - step2[23] = WRAPLOW(step1[20] + step1[23], 8); - - step2[24] = WRAPLOW(step1[24] + step1[27], 8); - step2[25] = WRAPLOW(step1[25] + step1[26], 8); - step2[26] = WRAPLOW(step1[25] - step1[26], 8); - step2[27] = WRAPLOW(step1[24] - step1[27], 8); - step2[28] = WRAPLOW(-step1[28] + step1[31], 8); - step2[29] = WRAPLOW(-step1[29] + step1[30], 8); - step2[30] = WRAPLOW(step1[29] + step1[30], 8); - step2[31] = WRAPLOW(step1[28] + step1[31], 8); + step2[16] = WRAPLOW(step1[16] + step1[19]); + step2[17] = WRAPLOW(step1[17] + step1[18]); + step2[18] = WRAPLOW(step1[17] - step1[18]); + step2[19] = WRAPLOW(step1[16] - step1[19]); + step2[20] = WRAPLOW(-step1[20] + step1[23]); + step2[21] = WRAPLOW(-step1[21] + step1[22]); + step2[22] = WRAPLOW(step1[21] + step1[22]); + step2[23] = WRAPLOW(step1[20] + step1[23]); + + step2[24] = WRAPLOW(step1[24] + step1[27]); + step2[25] = WRAPLOW(step1[25] + step1[26]); + step2[26] = WRAPLOW(step1[25] - step1[26]); + step2[27] = WRAPLOW(step1[24] - step1[27]); + step2[28] = WRAPLOW(-step1[28] + step1[31]); + step2[29] = WRAPLOW(-step1[29] + step1[30]); + step2[30] = WRAPLOW(step1[29] + step1[30]); + step2[31] = WRAPLOW(step1[28] + step1[31]); // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], 8); - step1[9] = WRAPLOW(step2[9] + step2[10], 8); - step1[10] = WRAPLOW(step2[9] - step2[10], 8); - step1[11] = WRAPLOW(step2[8] - step2[11], 8); - step1[12] = WRAPLOW(-step2[12] + step2[15], 8); - step1[13] = WRAPLOW(-step2[13] + step2[14], 8); - step1[14] = WRAPLOW(step2[13] + step2[14], 8); - step1[15] = WRAPLOW(step2[12] + step2[15], 8); + step1[8] = WRAPLOW(step2[8] + step2[11]); + step1[9] = WRAPLOW(step2[9] + step2[10]); + step1[10] = WRAPLOW(step2[9] - step2[10]); + step1[11] = WRAPLOW(step2[8] - step2[11]); + step1[12] = WRAPLOW(-step2[12] + step2[15]); + step1[13] = WRAPLOW(-step2[13] + step2[14]); + step1[14] = WRAPLOW(step2[13] + step2[14]); + step1[15] = WRAPLOW(step2[12] + step2[15]); step1[16] = step2[16]; step1[17] = step2[17]; temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[19] = WRAPLOW(dct_const_round_shift(temp1)); + step1[28] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; @@ -1039,62 +1039,62 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { step1[31] = step2[31]; // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], 8); - step2[1] = WRAPLOW(step1[1] + step1[6], 8); - step2[2] = WRAPLOW(step1[2] + step1[5], 8); - step2[3] = WRAPLOW(step1[3] + step1[4], 8); - step2[4] = WRAPLOW(step1[3] - step1[4], 8); - step2[5] = WRAPLOW(step1[2] - step1[5], 8); - step2[6] = WRAPLOW(step1[1] - step1[6], 8); - step2[7] = WRAPLOW(step1[0] - step1[7], 8); + step2[0] = WRAPLOW(step1[0] + step1[7]); + step2[1] = WRAPLOW(step1[1] + step1[6]); + step2[2] = WRAPLOW(step1[2] + step1[5]); + step2[3] = WRAPLOW(step1[3] + step1[4]); + step2[4] = WRAPLOW(step1[3] - step1[4]); + step2[5] = WRAPLOW(step1[2] - step1[5]); + step2[6] = WRAPLOW(step1[1] - step1[6]); + step2[7] = WRAPLOW(step1[0] - step1[7]); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[14] = step1[14]; step2[15] = step1[15]; - step2[16] = WRAPLOW(step1[16] + step1[23], 8); - step2[17] = WRAPLOW(step1[17] + step1[22], 8); - step2[18] = WRAPLOW(step1[18] + step1[21], 8); - step2[19] = WRAPLOW(step1[19] + step1[20], 8); - step2[20] = WRAPLOW(step1[19] - step1[20], 8); - step2[21] = WRAPLOW(step1[18] - step1[21], 8); - step2[22] = WRAPLOW(step1[17] - step1[22], 8); - step2[23] = WRAPLOW(step1[16] - step1[23], 8); - - step2[24] = WRAPLOW(-step1[24] + step1[31], 8); - step2[25] = WRAPLOW(-step1[25] + step1[30], 8); - step2[26] = WRAPLOW(-step1[26] + step1[29], 8); - step2[27] = WRAPLOW(-step1[27] + step1[28], 8); - step2[28] = WRAPLOW(step1[27] + step1[28], 8); - step2[29] = WRAPLOW(step1[26] + step1[29], 8); - step2[30] = WRAPLOW(step1[25] + step1[30], 8); - step2[31] = WRAPLOW(step1[24] + step1[31], 8); + step2[16] = WRAPLOW(step1[16] + step1[23]); + step2[17] = WRAPLOW(step1[17] + step1[22]); + step2[18] = WRAPLOW(step1[18] + step1[21]); + step2[19] = WRAPLOW(step1[19] + step1[20]); + step2[20] = WRAPLOW(step1[19] - step1[20]); + step2[21] = WRAPLOW(step1[18] - step1[21]); + step2[22] = WRAPLOW(step1[17] - step1[22]); + step2[23] = WRAPLOW(step1[16] - step1[23]); + + step2[24] = WRAPLOW(-step1[24] + step1[31]); + step2[25] = WRAPLOW(-step1[25] + step1[30]); + step2[26] = WRAPLOW(-step1[26] + step1[29]); + step2[27] = WRAPLOW(-step1[27] + step1[28]); + step2[28] = WRAPLOW(step1[27] + step1[28]); + step2[29] = WRAPLOW(step1[26] + step1[29]); + step2[30] = WRAPLOW(step1[25] + step1[30]); + step2[31] = WRAPLOW(step1[24] + step1[31]); // stage 7 - step1[0] = WRAPLOW(step2[0] + step2[15], 8); - step1[1] = WRAPLOW(step2[1] + step2[14], 8); - step1[2] = WRAPLOW(step2[2] + step2[13], 8); - step1[3] = WRAPLOW(step2[3] + step2[12], 8); - step1[4] = WRAPLOW(step2[4] + step2[11], 8); - step1[5] = WRAPLOW(step2[5] + step2[10], 8); - step1[6] = WRAPLOW(step2[6] + step2[9], 8); - step1[7] = WRAPLOW(step2[7] + step2[8], 8); - step1[8] = WRAPLOW(step2[7] - step2[8], 8); - step1[9] = WRAPLOW(step2[6] - step2[9], 8); - step1[10] = WRAPLOW(step2[5] - step2[10], 8); - step1[11] = WRAPLOW(step2[4] - step2[11], 8); - step1[12] = WRAPLOW(step2[3] - step2[12], 8); - step1[13] = WRAPLOW(step2[2] - step2[13], 8); - step1[14] = WRAPLOW(step2[1] - step2[14], 8); - step1[15] = WRAPLOW(step2[0] - step2[15], 8); + step1[0] = WRAPLOW(step2[0] + step2[15]); + step1[1] = WRAPLOW(step2[1] + step2[14]); + step1[2] = WRAPLOW(step2[2] + step2[13]); + step1[3] = WRAPLOW(step2[3] + step2[12]); + step1[4] = WRAPLOW(step2[4] + step2[11]); + step1[5] = WRAPLOW(step2[5] + step2[10]); + step1[6] = WRAPLOW(step2[6] + step2[9]); + step1[7] = WRAPLOW(step2[7] + step2[8]); + step1[8] = WRAPLOW(step2[7] - step2[8]); + step1[9] = WRAPLOW(step2[6] - step2[9]); + step1[10] = WRAPLOW(step2[5] - step2[10]); + step1[11] = WRAPLOW(step2[4] - step2[11]); + step1[12] = WRAPLOW(step2[3] - step2[12]); + step1[13] = WRAPLOW(step2[2] - step2[13]); + step1[14] = WRAPLOW(step2[1] - step2[14]); + step1[15] = WRAPLOW(step2[0] - step2[15]); step1[16] = step2[16]; step1[17] = step2[17]; @@ -1102,58 +1102,58 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { step1[19] = step2[19]; temp1 = (-step2[20] + step2[27]) * cospi_16_64; temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[21] + step2[26]) * cospi_16_64; temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[22] + step2[25]) * cospi_16_64; temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[23] + step2[24]) * cospi_16_64; temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[23] = WRAPLOW(dct_const_round_shift(temp1)); + step1[24] = WRAPLOW(dct_const_round_shift(temp2)); step1[28] = step2[28]; step1[29] = step2[29]; step1[30] = step2[30]; step1[31] = step2[31]; // final stage - output[0] = WRAPLOW(step1[0] + step1[31], 8); - output[1] = WRAPLOW(step1[1] + step1[30], 8); - output[2] = WRAPLOW(step1[2] + step1[29], 8); - output[3] = WRAPLOW(step1[3] + step1[28], 8); - output[4] = WRAPLOW(step1[4] + step1[27], 8); - output[5] = WRAPLOW(step1[5] + step1[26], 8); - output[6] = WRAPLOW(step1[6] + step1[25], 8); - output[7] = WRAPLOW(step1[7] + step1[24], 8); - output[8] = WRAPLOW(step1[8] + step1[23], 8); - output[9] = WRAPLOW(step1[9] + step1[22], 8); - output[10] = WRAPLOW(step1[10] + step1[21], 8); - output[11] = WRAPLOW(step1[11] + step1[20], 8); - output[12] = WRAPLOW(step1[12] + step1[19], 8); - output[13] = WRAPLOW(step1[13] + step1[18], 8); - output[14] = WRAPLOW(step1[14] + step1[17], 8); - output[15] = WRAPLOW(step1[15] + step1[16], 8); - output[16] = WRAPLOW(step1[15] - step1[16], 8); - output[17] = WRAPLOW(step1[14] - step1[17], 8); - output[18] = WRAPLOW(step1[13] - step1[18], 8); - output[19] = WRAPLOW(step1[12] - step1[19], 8); - output[20] = WRAPLOW(step1[11] - step1[20], 8); - output[21] = WRAPLOW(step1[10] - step1[21], 8); - output[22] = WRAPLOW(step1[9] - step1[22], 8); - output[23] = WRAPLOW(step1[8] - step1[23], 8); - output[24] = WRAPLOW(step1[7] - step1[24], 8); - output[25] = WRAPLOW(step1[6] - step1[25], 8); - output[26] = WRAPLOW(step1[5] - step1[26], 8); - output[27] = WRAPLOW(step1[4] - step1[27], 8); - output[28] = WRAPLOW(step1[3] - step1[28], 8); - output[29] = WRAPLOW(step1[2] - step1[29], 8); - output[30] = WRAPLOW(step1[1] - step1[30], 8); - output[31] = WRAPLOW(step1[0] - step1[31], 8); + output[0] = WRAPLOW(step1[0] + step1[31]); + output[1] = WRAPLOW(step1[1] + step1[30]); + output[2] = WRAPLOW(step1[2] + step1[29]); + output[3] = WRAPLOW(step1[3] + step1[28]); + output[4] = WRAPLOW(step1[4] + step1[27]); + output[5] = WRAPLOW(step1[5] + step1[26]); + output[6] = WRAPLOW(step1[6] + step1[25]); + output[7] = WRAPLOW(step1[7] + step1[24]); + output[8] = WRAPLOW(step1[8] + step1[23]); + output[9] = WRAPLOW(step1[9] + step1[22]); + output[10] = WRAPLOW(step1[10] + step1[21]); + output[11] = WRAPLOW(step1[11] + step1[20]); + output[12] = WRAPLOW(step1[12] + step1[19]); + output[13] = WRAPLOW(step1[13] + step1[18]); + output[14] = WRAPLOW(step1[14] + step1[17]); + output[15] = WRAPLOW(step1[15] + step1[16]); + output[16] = WRAPLOW(step1[15] - step1[16]); + output[17] = WRAPLOW(step1[14] - step1[17]); + output[18] = WRAPLOW(step1[13] - step1[18]); + output[19] = WRAPLOW(step1[12] - step1[19]); + output[20] = WRAPLOW(step1[11] - step1[20]); + output[21] = WRAPLOW(step1[10] - step1[21]); + output[22] = WRAPLOW(step1[9] - step1[22]); + output[23] = WRAPLOW(step1[8] - step1[23]); + output[24] = WRAPLOW(step1[7] - step1[24]); + output[25] = WRAPLOW(step1[6] - step1[25]); + output[26] = WRAPLOW(step1[5] - step1[26]); + output[27] = WRAPLOW(step1[4] - step1[27]); + output[28] = WRAPLOW(step1[3] - step1[28]); + output[29] = WRAPLOW(step1[2] - step1[29]); + output[30] = WRAPLOW(step1[1] - step1[30]); + output[31] = WRAPLOW(step1[0] - step1[31]); } void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, @@ -1253,8 +1253,8 @@ void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { @@ -1288,10 +1288,10 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, c1 = e1 - c1; a1 -= b1; d1 += c1; - op[0] = WRAPLOW(a1, bd); - op[1] = WRAPLOW(b1, bd); - op[2] = WRAPLOW(c1, bd); - op[3] = WRAPLOW(d1, bd); + op[0] = HIGHBD_WRAPLOW(a1, bd); + op[1] = HIGHBD_WRAPLOW(b1, bd); + op[2] = HIGHBD_WRAPLOW(c1, bd); + op[3] = HIGHBD_WRAPLOW(d1, bd); ip += 4; op += 4; } @@ -1332,8 +1332,8 @@ void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; - op[0] = WRAPLOW(a1, bd); - op[1] = op[2] = op[3] = WRAPLOW(e1, bd); + op[0] = HIGHBD_WRAPLOW(a1, bd); + op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); ip = tmp; for (i = 0; i < 4; i++) { @@ -1359,18 +1359,18 @@ void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { // stage 1 temp1 = (input[0] + input[2]) * cospi_16_64; temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 2 - output[0] = WRAPLOW(step[0] + step[3], bd); - output[1] = WRAPLOW(step[1] + step[2], bd); - output[2] = WRAPLOW(step[1] - step[2], bd); - output[3] = WRAPLOW(step[0] - step[3], bd); + output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); + output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); + output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); + output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); } void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1404,11 +1404,11 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, int dest_stride, int bd) { int i; tran_high_t a1; - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { @@ -1430,39 +1430,39 @@ void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { step1[3] = input[6]; temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 2 & stage 3 - even half vpx_highbd_idct4_c(step1, step1, bd); // stage 2 - odd half - step2[4] = WRAPLOW(step1[4] + step1[5], bd); - step2[5] = WRAPLOW(step1[4] - step1[5], bd); - step2[6] = WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = WRAPLOW(step1[6] + step1[7], bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); // stage 3 - odd half step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[7] = step2[7]; // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7], bd); - output[1] = WRAPLOW(step1[1] + step1[6], bd); - output[2] = WRAPLOW(step1[2] + step1[5], bd); - output[3] = WRAPLOW(step1[3] + step1[4], bd); - output[4] = WRAPLOW(step1[3] - step1[4], bd); - output[5] = WRAPLOW(step1[2] - step1[5], bd); - output[6] = WRAPLOW(step1[1] - step1[6], bd); - output[7] = WRAPLOW(step1[0] - step1[7], bd); + output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); } void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1496,10 +1496,10 @@ void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) @@ -1540,10 +1540,10 @@ void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. - output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); - output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); - output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); - output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); + output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd); + output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd); + output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd); } void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { @@ -1574,14 +1574,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s6 = cospi_26_64 * x6 + cospi_6_64 * x7; s7 = cospi_6_64 * x6 - cospi_26_64 * x7; - x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s4, bd), bd); - x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s5, bd), bd); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s6, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s7, bd), bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s0 - s4, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s1 - s5, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s2 - s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s3 - s7, bd), bd); + x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd); + x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd); // stage 2 s0 = x0; @@ -1593,14 +1593,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; s7 = cospi_8_64 * x6 + cospi_24_64 * x7; - x0 = WRAPLOW(s0 + s2, bd); - x1 = WRAPLOW(s1 + s3, bd); - x2 = WRAPLOW(s0 - s2, bd); - x3 = WRAPLOW(s1 - s3, bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); + x0 = HIGHBD_WRAPLOW(s0 + s2, bd); + x1 = HIGHBD_WRAPLOW(s1 + s3, bd); + x2 = HIGHBD_WRAPLOW(s0 - s2, bd); + x3 = HIGHBD_WRAPLOW(s1 - s3, bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); // stage 3 s2 = cospi_16_64 * (x2 + x3); @@ -1608,19 +1608,19 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (x6 - x7); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); - - output[0] = WRAPLOW(x0, bd); - output[1] = WRAPLOW(-x4, bd); - output[2] = WRAPLOW(x6, bd); - output[3] = WRAPLOW(-x2, bd); - output[4] = WRAPLOW(x3, bd); - output[5] = WRAPLOW(-x7, bd); - output[6] = WRAPLOW(x5, bd); - output[7] = WRAPLOW(-x1, bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); + + output[0] = HIGHBD_WRAPLOW(x0, bd); + output[1] = HIGHBD_WRAPLOW(-x4, bd); + output[2] = HIGHBD_WRAPLOW(x6, bd); + output[3] = HIGHBD_WRAPLOW(-x2, bd); + output[4] = HIGHBD_WRAPLOW(x3, bd); + output[5] = HIGHBD_WRAPLOW(-x7, bd); + output[6] = HIGHBD_WRAPLOW(x5, bd); + output[7] = HIGHBD_WRAPLOW(-x1, bd); } void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1685,23 +1685,23 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 3 step1[0] = step2[0]; @@ -1711,109 +1711,109 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - - step1[8] = WRAPLOW(step2[8] + step2[9], bd); - step1[9] = WRAPLOW(step2[8] - step2[9], bd); - step1[10] = WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = WRAPLOW(step2[10] + step2[11], bd); - step1[12] = WRAPLOW(step2[12] + step2[13], bd); - step1[13] = WRAPLOW(step2[12] - step2[13], bd); - step1[14] = WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = WRAPLOW(step2[14] + step2[15], bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); + step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); + step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - step2[4] = WRAPLOW(step1[4] + step1[5], bd); - step2[5] = WRAPLOW(step1[4] - step1[5], bd); - step2[6] = WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = WRAPLOW(step1[6] + step1[7], bd); + step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], bd); - step1[1] = WRAPLOW(step2[1] + step2[2], bd); - step1[2] = WRAPLOW(step2[1] - step2[2], bd); - step1[3] = WRAPLOW(step2[0] - step2[3], bd); + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); + step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); + step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], bd); - step1[9] = WRAPLOW(step2[9] + step2[10], bd); - step1[10] = WRAPLOW(step2[9] - step2[10], bd); - step1[11] = WRAPLOW(step2[8] - step2[11], bd); - step1[12] = WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = WRAPLOW(step2[13] + step2[14], bd); - step1[15] = WRAPLOW(step2[12] + step2[15], bd); + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); + step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); + step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], bd); - step2[1] = WRAPLOW(step1[1] + step1[6], bd); - step2[2] = WRAPLOW(step1[2] + step1[5], bd); - step2[3] = WRAPLOW(step1[3] + step1[4], bd); - step2[4] = WRAPLOW(step1[3] - step1[4], bd); - step2[5] = WRAPLOW(step1[2] - step1[5], bd); - step2[6] = WRAPLOW(step1[1] - step1[6], bd); - step2[7] = WRAPLOW(step1[0] - step1[7], bd); + step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15], bd); - output[1] = WRAPLOW(step2[1] + step2[14], bd); - output[2] = WRAPLOW(step2[2] + step2[13], bd); - output[3] = WRAPLOW(step2[3] + step2[12], bd); - output[4] = WRAPLOW(step2[4] + step2[11], bd); - output[5] = WRAPLOW(step2[5] + step2[10], bd); - output[6] = WRAPLOW(step2[6] + step2[9], bd); - output[7] = WRAPLOW(step2[7] + step2[8], bd); - output[8] = WRAPLOW(step2[7] - step2[8], bd); - output[9] = WRAPLOW(step2[6] - step2[9], bd); - output[10] = WRAPLOW(step2[5] - step2[10], bd); - output[11] = WRAPLOW(step2[4] - step2[11], bd); - output[12] = WRAPLOW(step2[3] - step2[12], bd); - output[13] = WRAPLOW(step2[2] - step2[13], bd); - output[14] = WRAPLOW(step2[1] - step2[14], bd); - output[15] = WRAPLOW(step2[0] - step2[15], bd); + output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); + output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); + output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); + output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); + output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); + output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); + output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); + output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); + output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); + output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); + output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); + output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); + output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); + output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); + output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); + output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); } void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1889,22 +1889,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s8, bd), bd); - x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s9, bd), bd); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s10, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s11, bd), bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s12, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s13, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s6 + s14, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s7 + s15, bd), bd); - x8 = WRAPLOW(highbd_dct_const_round_shift(s0 - s8, bd), bd); - x9 = WRAPLOW(highbd_dct_const_round_shift(s1 - s9, bd), bd); - x10 = WRAPLOW(highbd_dct_const_round_shift(s2 - s10, bd), bd); - x11 = WRAPLOW(highbd_dct_const_round_shift(s3 - s11, bd), bd); - x12 = WRAPLOW(highbd_dct_const_round_shift(s4 - s12, bd), bd); - x13 = WRAPLOW(highbd_dct_const_round_shift(s5 - s13, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s6 - s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s7 - s15, bd), bd); + x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd); + x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd); // stage 2 s0 = x0; @@ -1924,22 +1924,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - x0 = WRAPLOW(s0 + s4, bd); - x1 = WRAPLOW(s1 + s5, bd); - x2 = WRAPLOW(s2 + s6, bd); - x3 = WRAPLOW(s3 + s7, bd); - x4 = WRAPLOW(s0 - s4, bd); - x5 = WRAPLOW(s1 - s5, bd); - x6 = WRAPLOW(s2 - s6, bd); - x7 = WRAPLOW(s3 - s7, bd); - x8 = WRAPLOW(highbd_dct_const_round_shift(s8 + s12, bd), bd); - x9 = WRAPLOW(highbd_dct_const_round_shift(s9 + s13, bd), bd); - x10 = WRAPLOW(highbd_dct_const_round_shift(s10 + s14, bd), bd); - x11 = WRAPLOW(highbd_dct_const_round_shift(s11 + s15, bd), bd); - x12 = WRAPLOW(highbd_dct_const_round_shift(s8 - s12, bd), bd); - x13 = WRAPLOW(highbd_dct_const_round_shift(s9 - s13, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s10 - s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s11 - s15, bd), bd); + x0 = HIGHBD_WRAPLOW(s0 + s4, bd); + x1 = HIGHBD_WRAPLOW(s1 + s5, bd); + x2 = HIGHBD_WRAPLOW(s2 + s6, bd); + x3 = HIGHBD_WRAPLOW(s3 + s7, bd); + x4 = HIGHBD_WRAPLOW(s0 - s4, bd); + x5 = HIGHBD_WRAPLOW(s1 - s5, bd); + x6 = HIGHBD_WRAPLOW(s2 - s6, bd); + x7 = HIGHBD_WRAPLOW(s3 - s7, bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd); // stage 3 s0 = x0; @@ -1959,22 +1959,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - x0 = WRAPLOW(s0 + s2, bd); - x1 = WRAPLOW(s1 + s3, bd); - x2 = WRAPLOW(s0 - s2, bd); - x3 = WRAPLOW(s1 - s3, bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); - x8 = WRAPLOW(s8 + s10, bd); - x9 = WRAPLOW(s9 + s11, bd); - x10 = WRAPLOW(s8 - s10, bd); - x11 = WRAPLOW(s9 - s11, bd); - x12 = WRAPLOW(highbd_dct_const_round_shift(s12 + s14, bd), bd); - x13 = WRAPLOW(highbd_dct_const_round_shift(s13 + s15, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s12 - s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s13 - s15, bd), bd); + x0 = HIGHBD_WRAPLOW(s0 + s2, bd); + x1 = HIGHBD_WRAPLOW(s1 + s3, bd); + x2 = HIGHBD_WRAPLOW(s0 - s2, bd); + x3 = HIGHBD_WRAPLOW(s1 - s3, bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); + x8 = HIGHBD_WRAPLOW(s8 + s10, bd); + x9 = HIGHBD_WRAPLOW(s9 + s11, bd); + x10 = HIGHBD_WRAPLOW(s8 - s10, bd); + x11 = HIGHBD_WRAPLOW(s9 - s11, bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd); // stage 4 s2 = (- cospi_16_64) * (x2 + x3); @@ -1986,31 +1986,31 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = (- cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); - x10 = WRAPLOW(highbd_dct_const_round_shift(s10, bd), bd); - x11 = WRAPLOW(highbd_dct_const_round_shift(s11, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s15, bd), bd); - - output[0] = WRAPLOW(x0, bd); - output[1] = WRAPLOW(-x8, bd); - output[2] = WRAPLOW(x12, bd); - output[3] = WRAPLOW(-x4, bd); - output[4] = WRAPLOW(x6, bd); - output[5] = WRAPLOW(x14, bd); - output[6] = WRAPLOW(x10, bd); - output[7] = WRAPLOW(x2, bd); - output[8] = WRAPLOW(x3, bd); - output[9] = WRAPLOW(x11, bd); - output[10] = WRAPLOW(x15, bd); - output[11] = WRAPLOW(x7, bd); - output[12] = WRAPLOW(x5, bd); - output[13] = WRAPLOW(-x13, bd); - output[14] = WRAPLOW(x9, bd); - output[15] = WRAPLOW(-x1, bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd); + + output[0] = HIGHBD_WRAPLOW(x0, bd); + output[1] = HIGHBD_WRAPLOW(-x8, bd); + output[2] = HIGHBD_WRAPLOW(x12, bd); + output[3] = HIGHBD_WRAPLOW(-x4, bd); + output[4] = HIGHBD_WRAPLOW(x6, bd); + output[5] = HIGHBD_WRAPLOW(x14, bd); + output[6] = HIGHBD_WRAPLOW(x10, bd); + output[7] = HIGHBD_WRAPLOW(x2, bd); + output[8] = HIGHBD_WRAPLOW(x3, bd); + output[9] = HIGHBD_WRAPLOW(x11, bd); + output[10] = HIGHBD_WRAPLOW(x15, bd); + output[11] = HIGHBD_WRAPLOW(x7, bd); + output[12] = HIGHBD_WRAPLOW(x5, bd); + output[13] = HIGHBD_WRAPLOW(-x13, bd); + output[14] = HIGHBD_WRAPLOW(x9, bd); + output[15] = HIGHBD_WRAPLOW(-x1, bd); } void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, @@ -2045,11 +2045,11 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) @@ -2084,43 +2084,43 @@ static void highbd_idct32_c(const tran_low_t *input, temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[31] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 2 step2[0] = step1[0]; @@ -2134,40 +2134,40 @@ static void highbd_idct32_c(const tran_low_t *input, temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - - step2[16] = WRAPLOW(step1[16] + step1[17], bd); - step2[17] = WRAPLOW(step1[16] - step1[17], bd); - step2[18] = WRAPLOW(-step1[18] + step1[19], bd); - step2[19] = WRAPLOW(step1[18] + step1[19], bd); - step2[20] = WRAPLOW(step1[20] + step1[21], bd); - step2[21] = WRAPLOW(step1[20] - step1[21], bd); - step2[22] = WRAPLOW(-step1[22] + step1[23], bd); - step2[23] = WRAPLOW(step1[22] + step1[23], bd); - step2[24] = WRAPLOW(step1[24] + step1[25], bd); - step2[25] = WRAPLOW(step1[24] - step1[25], bd); - step2[26] = WRAPLOW(-step1[26] + step1[27], bd); - step2[27] = WRAPLOW(step1[26] + step1[27], bd); - step2[28] = WRAPLOW(step1[28] + step1[29], bd); - step2[29] = WRAPLOW(step1[28] - step1[29], bd); - step2[30] = WRAPLOW(-step1[30] + step1[31], bd); - step2[31] = WRAPLOW(step1[30] + step1[31], bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); + step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); + step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); + step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); + step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); + step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); + step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); + step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); + step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); + step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); + step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); + step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); + step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); + step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); + step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); + step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); // stage 3 step1[0] = step2[0]; @@ -2177,42 +2177,42 @@ static void highbd_idct32_c(const tran_low_t *input, temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - - step1[8] = WRAPLOW(step2[8] + step2[9], bd); - step1[9] = WRAPLOW(step2[8] - step2[9], bd); - step1[10] = WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = WRAPLOW(step2[10] + step2[11], bd); - step1[12] = WRAPLOW(step2[12] + step2[13], bd); - step1[13] = WRAPLOW(step2[12] - step2[13], bd); - step1[14] = WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = WRAPLOW(step2[14] + step2[15], bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); + step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); + step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); step1[16] = step2[16]; step1[31] = step2[31]; temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[19] = step2[19]; step1[20] = step2[20]; temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; @@ -2221,87 +2221,87 @@ static void highbd_idct32_c(const tran_low_t *input, // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - step2[4] = WRAPLOW(step1[4] + step1[5], bd); - step2[5] = WRAPLOW(step1[4] - step1[5], bd); - step2[6] = WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = WRAPLOW(step1[6] + step1[7], bd); + step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[11] = step1[11]; step2[12] = step1[12]; - step2[16] = WRAPLOW(step1[16] + step1[19], bd); - step2[17] = WRAPLOW(step1[17] + step1[18], bd); - step2[18] = WRAPLOW(step1[17] - step1[18], bd); - step2[19] = WRAPLOW(step1[16] - step1[19], bd); - step2[20] = WRAPLOW(-step1[20] + step1[23], bd); - step2[21] = WRAPLOW(-step1[21] + step1[22], bd); - step2[22] = WRAPLOW(step1[21] + step1[22], bd); - step2[23] = WRAPLOW(step1[20] + step1[23], bd); - - step2[24] = WRAPLOW(step1[24] + step1[27], bd); - step2[25] = WRAPLOW(step1[25] + step1[26], bd); - step2[26] = WRAPLOW(step1[25] - step1[26], bd); - step2[27] = WRAPLOW(step1[24] - step1[27], bd); - step2[28] = WRAPLOW(-step1[28] + step1[31], bd); - step2[29] = WRAPLOW(-step1[29] + step1[30], bd); - step2[30] = WRAPLOW(step1[29] + step1[30], bd); - step2[31] = WRAPLOW(step1[28] + step1[31], bd); + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); + step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); + step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); + step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); + step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); + step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); + step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); + step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); + + step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); + step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); + step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); + step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); + step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); + step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); + step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); + step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], bd); - step1[1] = WRAPLOW(step2[1] + step2[2], bd); - step1[2] = WRAPLOW(step2[1] - step2[2], bd); - step1[3] = WRAPLOW(step2[0] - step2[3], bd); + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); + step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); + step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], bd); - step1[9] = WRAPLOW(step2[9] + step2[10], bd); - step1[10] = WRAPLOW(step2[9] - step2[10], bd); - step1[11] = WRAPLOW(step2[8] - step2[11], bd); - step1[12] = WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = WRAPLOW(step2[13] + step2[14], bd); - step1[15] = WRAPLOW(step2[12] + step2[15], bd); + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); + step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); + step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); step1[16] = step2[16]; step1[17] = step2[17]; temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; @@ -2310,62 +2310,62 @@ static void highbd_idct32_c(const tran_low_t *input, step1[31] = step2[31]; // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], bd); - step2[1] = WRAPLOW(step1[1] + step1[6], bd); - step2[2] = WRAPLOW(step1[2] + step1[5], bd); - step2[3] = WRAPLOW(step1[3] + step1[4], bd); - step2[4] = WRAPLOW(step1[3] - step1[4], bd); - step2[5] = WRAPLOW(step1[2] - step1[5], bd); - step2[6] = WRAPLOW(step1[1] - step1[6], bd); - step2[7] = WRAPLOW(step1[0] - step1[7], bd); + step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[14] = step1[14]; step2[15] = step1[15]; - step2[16] = WRAPLOW(step1[16] + step1[23], bd); - step2[17] = WRAPLOW(step1[17] + step1[22], bd); - step2[18] = WRAPLOW(step1[18] + step1[21], bd); - step2[19] = WRAPLOW(step1[19] + step1[20], bd); - step2[20] = WRAPLOW(step1[19] - step1[20], bd); - step2[21] = WRAPLOW(step1[18] - step1[21], bd); - step2[22] = WRAPLOW(step1[17] - step1[22], bd); - step2[23] = WRAPLOW(step1[16] - step1[23], bd); - - step2[24] = WRAPLOW(-step1[24] + step1[31], bd); - step2[25] = WRAPLOW(-step1[25] + step1[30], bd); - step2[26] = WRAPLOW(-step1[26] + step1[29], bd); - step2[27] = WRAPLOW(-step1[27] + step1[28], bd); - step2[28] = WRAPLOW(step1[27] + step1[28], bd); - step2[29] = WRAPLOW(step1[26] + step1[29], bd); - step2[30] = WRAPLOW(step1[25] + step1[30], bd); - step2[31] = WRAPLOW(step1[24] + step1[31], bd); + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); + step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); + step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); + step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); + step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); + step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); + step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); + step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); + + step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); + step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); + step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); + step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); + step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); + step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); + step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); + step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); // stage 7 - step1[0] = WRAPLOW(step2[0] + step2[15], bd); - step1[1] = WRAPLOW(step2[1] + step2[14], bd); - step1[2] = WRAPLOW(step2[2] + step2[13], bd); - step1[3] = WRAPLOW(step2[3] + step2[12], bd); - step1[4] = WRAPLOW(step2[4] + step2[11], bd); - step1[5] = WRAPLOW(step2[5] + step2[10], bd); - step1[6] = WRAPLOW(step2[6] + step2[9], bd); - step1[7] = WRAPLOW(step2[7] + step2[8], bd); - step1[8] = WRAPLOW(step2[7] - step2[8], bd); - step1[9] = WRAPLOW(step2[6] - step2[9], bd); - step1[10] = WRAPLOW(step2[5] - step2[10], bd); - step1[11] = WRAPLOW(step2[4] - step2[11], bd); - step1[12] = WRAPLOW(step2[3] - step2[12], bd); - step1[13] = WRAPLOW(step2[2] - step2[13], bd); - step1[14] = WRAPLOW(step2[1] - step2[14], bd); - step1[15] = WRAPLOW(step2[0] - step2[15], bd); + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); + step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); + step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); + step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); + step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); + step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); + step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); + step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); + step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); + step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); step1[16] = step2[16]; step1[17] = step2[17]; @@ -2373,58 +2373,58 @@ static void highbd_idct32_c(const tran_low_t *input, step1[19] = step2[19]; temp1 = (-step2[20] + step2[27]) * cospi_16_64; temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step2[21] + step2[26]) * cospi_16_64; temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step2[22] + step2[25]) * cospi_16_64; temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step2[23] + step2[24]) * cospi_16_64; temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[28] = step2[28]; step1[29] = step2[29]; step1[30] = step2[30]; step1[31] = step2[31]; // final stage - output[0] = WRAPLOW(step1[0] + step1[31], bd); - output[1] = WRAPLOW(step1[1] + step1[30], bd); - output[2] = WRAPLOW(step1[2] + step1[29], bd); - output[3] = WRAPLOW(step1[3] + step1[28], bd); - output[4] = WRAPLOW(step1[4] + step1[27], bd); - output[5] = WRAPLOW(step1[5] + step1[26], bd); - output[6] = WRAPLOW(step1[6] + step1[25], bd); - output[7] = WRAPLOW(step1[7] + step1[24], bd); - output[8] = WRAPLOW(step1[8] + step1[23], bd); - output[9] = WRAPLOW(step1[9] + step1[22], bd); - output[10] = WRAPLOW(step1[10] + step1[21], bd); - output[11] = WRAPLOW(step1[11] + step1[20], bd); - output[12] = WRAPLOW(step1[12] + step1[19], bd); - output[13] = WRAPLOW(step1[13] + step1[18], bd); - output[14] = WRAPLOW(step1[14] + step1[17], bd); - output[15] = WRAPLOW(step1[15] + step1[16], bd); - output[16] = WRAPLOW(step1[15] - step1[16], bd); - output[17] = WRAPLOW(step1[14] - step1[17], bd); - output[18] = WRAPLOW(step1[13] - step1[18], bd); - output[19] = WRAPLOW(step1[12] - step1[19], bd); - output[20] = WRAPLOW(step1[11] - step1[20], bd); - output[21] = WRAPLOW(step1[10] - step1[21], bd); - output[22] = WRAPLOW(step1[9] - step1[22], bd); - output[23] = WRAPLOW(step1[8] - step1[23], bd); - output[24] = WRAPLOW(step1[7] - step1[24], bd); - output[25] = WRAPLOW(step1[6] - step1[25], bd); - output[26] = WRAPLOW(step1[5] - step1[26], bd); - output[27] = WRAPLOW(step1[4] - step1[27], bd); - output[28] = WRAPLOW(step1[3] - step1[28], bd); - output[29] = WRAPLOW(step1[2] - step1[29], bd); - output[30] = WRAPLOW(step1[1] - step1[30], bd); - output[31] = WRAPLOW(step1[0] - step1[31], bd); + output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); + output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); + output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); + output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); + output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); + output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); + output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); + output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); + output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); + output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); + output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); + output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); + output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); + output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); + output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); + output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); + output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); + output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); + output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); + output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); + output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); + output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); + output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); + output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); + output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); + output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); + output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); + output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); + output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); + output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); + output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); + output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); } void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, @@ -2500,9 +2500,9 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, int a1; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { diff --git a/vpx_dsp/inv_txfm.h b/vpx_dsp/inv_txfm.h index 23588139e..c407dd896 100644 --- a/vpx_dsp/inv_txfm.h +++ b/vpx_dsp/inv_txfm.h @@ -21,7 +21,7 @@ extern "C" { #endif -static INLINE tran_low_t check_range(tran_high_t input) { +static INLINE tran_high_t check_range(tran_high_t input) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid VP9 input streams, intermediate stage coefficients should always // stay within the range of a signed 16 bit integer. Coefficients can go out @@ -32,17 +32,17 @@ static INLINE tran_low_t check_range(tran_high_t input) { assert(INT16_MIN <= input); assert(input <= INT16_MAX); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING - return (tran_low_t)input; + return input; } -static INLINE tran_low_t dct_const_round_shift(tran_high_t input) { +static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return check_range(rv); + return (tran_high_t)rv; } #if CONFIG_VP9_HIGHBITDEPTH -static INLINE tran_low_t highbd_check_range(tran_high_t input, - int bd) { +static INLINE tran_high_t highbd_check_range(tran_high_t input, + int bd) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid highbitdepth VP9 streams, intermediate stage coefficients will // stay within the ranges: @@ -56,13 +56,12 @@ static INLINE tran_low_t highbd_check_range(tran_high_t input, (void) int_min; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING (void) bd; - return (tran_low_t)input; + return input; } -static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input, - int bd) { +static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return highbd_check_range(rv, bd); + return (tran_high_t)rv; } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -83,9 +82,20 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input, // bd of 10 uses trans_low with 18bits, need to remove 14bits // bd of 12 uses trans_low with 20bits, need to remove 12bits // bd of x uses trans_low with 8+x bits, need to remove 24-x bits -#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) -#else -#define WRAPLOW(x, bd) ((int32_t)(x)) + +#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_WRAPLOW(x, bd) \ + ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) +#endif // CONFIG_VP9_HIGHBITDEPTH + +#else // CONFIG_EMULATE_HARDWARE + +#define WRAPLOW(x) ((int32_t)check_range(x)) +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_WRAPLOW(x, bd) \ + ((int32_t)highbd_check_range((x), bd)) +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EMULATE_HARDWARE void idct4_c(const tran_low_t *input, tran_low_t *output); @@ -107,14 +117,14 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, int bd) { - trans = WRAPLOW(trans, bd); - return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); + trans = HIGHBD_WRAPLOW(trans, bd); + return clip_pixel_highbd(dest + trans, bd); } #endif static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { - trans = WRAPLOW(trans, 8); - return clip_pixel(WRAPLOW(dest + trans, 8)); + trans = WRAPLOW(trans); + return clip_pixel(dest + trans); } #ifdef __cplusplus } // extern "C" -- cgit v1.2.3