diff options
Diffstat (limited to 'vp8/common/idctllm.c')
-rw-r--r-- | vp8/common/idctllm.c | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index dbf0fda82..e549fe098 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -647,3 +647,275 @@ void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) { op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1; } + +#if CONFIG_TX16X16 +#if 0 +// Keep a really bad float version as reference for now. +void vp8_short_idct16x16_c(short *input, short *output, int pitch) { + double x; + const int short_pitch = pitch >> 1; + int i, j, k, l; + for (l = 0; l < 16; ++l) { + for (k = 0; k < 16; ++k) { + double s = 0; + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) { + x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/32; + if (i != 0) + x *= sqrt(2.0); + if (j != 0) + x *= sqrt(2.0); + s += x; + } + } + output[k*short_pitch+l] = (short)round(s); + } + } +} +#endif + +static void butterfly_16x16_idct_1d(double input[16], double output[16]) { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + const double PI = M_PI; + const double C1 = cos(1*PI/(double)32); + const double C2 = cos(2*PI/(double)32); + const double C3 = cos(3*PI/(double)32); + const double C4 = cos(4*PI/(double)32); + const double C5 = cos(5*PI/(double)32); + const double C6 = cos(6*PI/(double)32); + const double C7 = cos(7*PI/(double)32); + const double C8 = cos(8*PI/(double)32); + const double C9 = cos(9*PI/(double)32); + const double C10 = cos(10*PI/(double)32); + const double C11 = cos(11*PI/(double)32); + const double C12 = cos(12*PI/(double)32); + const double C13 = cos(13*PI/(double)32); + const double C14 = cos(14*PI/(double)32); + const double C15 = cos(15*PI/(double)32); + + // step 1 and 2 + step[ 0] = input[0] + input[8]; + step[ 1] = input[0] - input[8]; + + temp1 = input[4]*C12; + temp2 = input[12]*C4; + + temp1 -= temp2; + temp1 *= C8; + + step[ 2] = 2*(temp1); + + temp1 = input[4]*C4; + temp2 = input[12]*C12; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + step[ 3] = 2*(temp1); + + temp1 = input[2]*C8; + temp1 = 2*(temp1); + temp2 = input[6] + input[10]; + + step[ 4] = temp1 + temp2; + step[ 5] = temp1 - temp2; + + temp1 = input[14]*C8; + temp1 = 2*(temp1); + temp2 = input[6] - input[10]; + + step[ 6] = temp2 - temp1; + step[ 7] = temp2 + temp1; + + // for odd input + temp1 = input[3]*C12; + temp2 = input[13]*C4; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + intermediate[ 8] = 2*(temp1); + + temp1 = input[3]*C4; + temp2 = input[13]*C12; + temp2 -= temp1; + temp2 = (temp2); + temp2 *= C8; + intermediate[ 9] = 2*(temp2); + + intermediate[10] = 2*(input[9]*C8); + intermediate[11] = input[15] - input[1]; + intermediate[12] = input[15] + input[1]; + intermediate[13] = 2*((input[7]*C8)); + + temp1 = input[11]*C12; + temp2 = input[5]*C4; + temp2 -= temp1; + temp2 = (temp2); + temp2 *= C8; + intermediate[14] = 2*(temp2); + + temp1 = input[11]*C4; + temp2 = input[5]*C12; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + intermediate[15] = 2*(temp1); + + step[ 8] = intermediate[ 8] + intermediate[14]; + step[ 9] = intermediate[ 9] + intermediate[15]; + step[10] = intermediate[10] + intermediate[11]; + step[11] = intermediate[10] - intermediate[11]; + step[12] = intermediate[12] + intermediate[13]; + step[13] = intermediate[12] - intermediate[13]; + step[14] = intermediate[ 8] - intermediate[14]; + step[15] = intermediate[ 9] - intermediate[15]; + + // step 3 + output[0] = step[ 0] + step[ 3]; + output[1] = step[ 1] + step[ 2]; + output[2] = step[ 1] - step[ 2]; + output[3] = step[ 0] - step[ 3]; + + temp1 = step[ 4]*C14; + temp2 = step[ 7]*C2; + temp1 -= temp2; + output[4] = (temp1); + + temp1 = step[ 4]*C2; + temp2 = step[ 7]*C14; + temp1 += temp2; + output[7] = (temp1); + + temp1 = step[ 5]*C10; + temp2 = step[ 6]*C6; + temp1 -= temp2; + output[5] = (temp1); + + temp1 = step[ 5]*C6; + temp2 = step[ 6]*C10; + temp1 += temp2; + output[6] = (temp1); + + output[8] = step[ 8] + step[11]; + output[9] = step[ 9] + step[10]; + output[10] = step[ 9] - step[10]; + output[11] = step[ 8] - step[11]; + output[12] = step[12] + step[15]; + output[13] = step[13] + step[14]; + output[14] = step[13] - step[14]; + output[15] = step[12] - step[15]; + + // output 4 + step[ 0] = output[0] + output[7]; + step[ 1] = output[1] + output[6]; + step[ 2] = output[2] + output[5]; + step[ 3] = output[3] + output[4]; + step[ 4] = output[3] - output[4]; + step[ 5] = output[2] - output[5]; + step[ 6] = output[1] - output[6]; + step[ 7] = output[0] - output[7]; + + temp1 = output[8]*C7; + temp2 = output[15]*C9; + temp1 -= temp2; + step[ 8] = (temp1); + + temp1 = output[9]*C11; + temp2 = output[14]*C5; + temp1 += temp2; + step[ 9] = (temp1); + + temp1 = output[10]*C3; + temp2 = output[13]*C13; + temp1 -= temp2; + step[10] = (temp1); + + temp1 = output[11]*C15; + temp2 = output[12]*C1; + temp1 += temp2; + step[11] = (temp1); + + temp1 = output[11]*C1; + temp2 = output[12]*C15; + temp2 -= temp1; + step[12] = (temp2); + + temp1 = output[10]*C13; + temp2 = output[13]*C3; + temp1 += temp2; + step[13] = (temp1); + + temp1 = output[9]*C5; + temp2 = output[14]*C11; + temp2 -= temp1; + step[14] = (temp2); + + temp1 = output[8]*C9; + temp2 = output[15]*C7; + temp1 += temp2; + step[15] = (temp1); + + // step 5 + output[0] = (step[0] + step[15]); + output[1] = (step[1] + step[14]); + output[2] = (step[2] + step[13]); + output[3] = (step[3] + step[12]); + output[4] = (step[4] + step[11]); + output[5] = (step[5] + step[10]); + output[6] = (step[6] + step[ 9]); + output[7] = (step[7] + step[ 8]); + + output[15] = (step[0] - step[15]); + output[14] = (step[1] - step[14]); + output[13] = (step[2] - step[13]); + output[12] = (step[3] - step[12]); + output[11] = (step[4] - step[11]); + output[10] = (step[5] - step[10]); + output[9] = (step[6] - step[ 9]); + output[8] = (step[7] - step[ 8]); +} + +// Remove once an int version of iDCT is written +#if 0 +void reference_16x16_idct_1d(double input[16], double output[16]) { + const double kPi = 3.141592653589793238462643383279502884; + const double kSqrt2 = 1.414213562373095048801688724209698; + for (int k = 0; k < 16; k++) { + output[k] = 0.0; + for (int n = 0; n < 16; n++) { + output[k] += input[n]*cos(kPi*(2*k+1)*n/32.0); + if (n == 0) + output[k] = output[k]/kSqrt2; + } + } +} +#endif + +void vp8_short_idct16x16_c(short *input, short *output, int pitch) { + double out[16*16], out2[16*16]; + const int short_pitch = pitch >> 1; + int i, j; + // First transform rows + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = input[j + i*short_pitch]; + butterfly_16x16_idct_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out[j + i*16] = temp_out[j]; + } + // Then transform columns + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + butterfly_16x16_idct_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out2[j*16 + i] = temp_out[j]; + } + for (i = 0; i < 16*16; ++i) + output[i] = round(out2[i]/128); +} +#endif |