summaryrefslogtreecommitdiff
path: root/vp8/common/idctllm.c
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common/idctllm.c')
-rw-r--r--vp8/common/idctllm.c272
1 files changed, 272 insertions, 0 deletions
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index dbf0fda82..e549fe098 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -647,3 +647,275 @@ void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) {
op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1;
}
+
+#if CONFIG_TX16X16
+#if 0
+// Keep a really bad float version as reference for now.
+void vp8_short_idct16x16_c(short *input, short *output, int pitch) {
+ double x;
+ const int short_pitch = pitch >> 1;
+ int i, j, k, l;
+ for (l = 0; l < 16; ++l) {
+ for (k = 0; k < 16; ++k) {
+ double s = 0;
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) {
+ x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/32;
+ if (i != 0)
+ x *= sqrt(2.0);
+ if (j != 0)
+ x *= sqrt(2.0);
+ s += x;
+ }
+ }
+ output[k*short_pitch+l] = (short)round(s);
+ }
+ }
+}
+#endif
+
+static void butterfly_16x16_idct_1d(double input[16], double output[16]) {
+ double step[16];
+ double intermediate[16];
+ double temp1, temp2;
+
+ const double PI = M_PI;
+ const double C1 = cos(1*PI/(double)32);
+ const double C2 = cos(2*PI/(double)32);
+ const double C3 = cos(3*PI/(double)32);
+ const double C4 = cos(4*PI/(double)32);
+ const double C5 = cos(5*PI/(double)32);
+ const double C6 = cos(6*PI/(double)32);
+ const double C7 = cos(7*PI/(double)32);
+ const double C8 = cos(8*PI/(double)32);
+ const double C9 = cos(9*PI/(double)32);
+ const double C10 = cos(10*PI/(double)32);
+ const double C11 = cos(11*PI/(double)32);
+ const double C12 = cos(12*PI/(double)32);
+ const double C13 = cos(13*PI/(double)32);
+ const double C14 = cos(14*PI/(double)32);
+ const double C15 = cos(15*PI/(double)32);
+
+ // step 1 and 2
+ step[ 0] = input[0] + input[8];
+ step[ 1] = input[0] - input[8];
+
+ temp1 = input[4]*C12;
+ temp2 = input[12]*C4;
+
+ temp1 -= temp2;
+ temp1 *= C8;
+
+ step[ 2] = 2*(temp1);
+
+ temp1 = input[4]*C4;
+ temp2 = input[12]*C12;
+ temp1 += temp2;
+ temp1 = (temp1);
+ temp1 *= C8;
+ step[ 3] = 2*(temp1);
+
+ temp1 = input[2]*C8;
+ temp1 = 2*(temp1);
+ temp2 = input[6] + input[10];
+
+ step[ 4] = temp1 + temp2;
+ step[ 5] = temp1 - temp2;
+
+ temp1 = input[14]*C8;
+ temp1 = 2*(temp1);
+ temp2 = input[6] - input[10];
+
+ step[ 6] = temp2 - temp1;
+ step[ 7] = temp2 + temp1;
+
+ // for odd input
+ temp1 = input[3]*C12;
+ temp2 = input[13]*C4;
+ temp1 += temp2;
+ temp1 = (temp1);
+ temp1 *= C8;
+ intermediate[ 8] = 2*(temp1);
+
+ temp1 = input[3]*C4;
+ temp2 = input[13]*C12;
+ temp2 -= temp1;
+ temp2 = (temp2);
+ temp2 *= C8;
+ intermediate[ 9] = 2*(temp2);
+
+ intermediate[10] = 2*(input[9]*C8);
+ intermediate[11] = input[15] - input[1];
+ intermediate[12] = input[15] + input[1];
+ intermediate[13] = 2*((input[7]*C8));
+
+ temp1 = input[11]*C12;
+ temp2 = input[5]*C4;
+ temp2 -= temp1;
+ temp2 = (temp2);
+ temp2 *= C8;
+ intermediate[14] = 2*(temp2);
+
+ temp1 = input[11]*C4;
+ temp2 = input[5]*C12;
+ temp1 += temp2;
+ temp1 = (temp1);
+ temp1 *= C8;
+ intermediate[15] = 2*(temp1);
+
+ step[ 8] = intermediate[ 8] + intermediate[14];
+ step[ 9] = intermediate[ 9] + intermediate[15];
+ step[10] = intermediate[10] + intermediate[11];
+ step[11] = intermediate[10] - intermediate[11];
+ step[12] = intermediate[12] + intermediate[13];
+ step[13] = intermediate[12] - intermediate[13];
+ step[14] = intermediate[ 8] - intermediate[14];
+ step[15] = intermediate[ 9] - intermediate[15];
+
+ // step 3
+ output[0] = step[ 0] + step[ 3];
+ output[1] = step[ 1] + step[ 2];
+ output[2] = step[ 1] - step[ 2];
+ output[3] = step[ 0] - step[ 3];
+
+ temp1 = step[ 4]*C14;
+ temp2 = step[ 7]*C2;
+ temp1 -= temp2;
+ output[4] = (temp1);
+
+ temp1 = step[ 4]*C2;
+ temp2 = step[ 7]*C14;
+ temp1 += temp2;
+ output[7] = (temp1);
+
+ temp1 = step[ 5]*C10;
+ temp2 = step[ 6]*C6;
+ temp1 -= temp2;
+ output[5] = (temp1);
+
+ temp1 = step[ 5]*C6;
+ temp2 = step[ 6]*C10;
+ temp1 += temp2;
+ output[6] = (temp1);
+
+ output[8] = step[ 8] + step[11];
+ output[9] = step[ 9] + step[10];
+ output[10] = step[ 9] - step[10];
+ output[11] = step[ 8] - step[11];
+ output[12] = step[12] + step[15];
+ output[13] = step[13] + step[14];
+ output[14] = step[13] - step[14];
+ output[15] = step[12] - step[15];
+
+ // output 4
+ step[ 0] = output[0] + output[7];
+ step[ 1] = output[1] + output[6];
+ step[ 2] = output[2] + output[5];
+ step[ 3] = output[3] + output[4];
+ step[ 4] = output[3] - output[4];
+ step[ 5] = output[2] - output[5];
+ step[ 6] = output[1] - output[6];
+ step[ 7] = output[0] - output[7];
+
+ temp1 = output[8]*C7;
+ temp2 = output[15]*C9;
+ temp1 -= temp2;
+ step[ 8] = (temp1);
+
+ temp1 = output[9]*C11;
+ temp2 = output[14]*C5;
+ temp1 += temp2;
+ step[ 9] = (temp1);
+
+ temp1 = output[10]*C3;
+ temp2 = output[13]*C13;
+ temp1 -= temp2;
+ step[10] = (temp1);
+
+ temp1 = output[11]*C15;
+ temp2 = output[12]*C1;
+ temp1 += temp2;
+ step[11] = (temp1);
+
+ temp1 = output[11]*C1;
+ temp2 = output[12]*C15;
+ temp2 -= temp1;
+ step[12] = (temp2);
+
+ temp1 = output[10]*C13;
+ temp2 = output[13]*C3;
+ temp1 += temp2;
+ step[13] = (temp1);
+
+ temp1 = output[9]*C5;
+ temp2 = output[14]*C11;
+ temp2 -= temp1;
+ step[14] = (temp2);
+
+ temp1 = output[8]*C9;
+ temp2 = output[15]*C7;
+ temp1 += temp2;
+ step[15] = (temp1);
+
+ // step 5
+ output[0] = (step[0] + step[15]);
+ output[1] = (step[1] + step[14]);
+ output[2] = (step[2] + step[13]);
+ output[3] = (step[3] + step[12]);
+ output[4] = (step[4] + step[11]);
+ output[5] = (step[5] + step[10]);
+ output[6] = (step[6] + step[ 9]);
+ output[7] = (step[7] + step[ 8]);
+
+ output[15] = (step[0] - step[15]);
+ output[14] = (step[1] - step[14]);
+ output[13] = (step[2] - step[13]);
+ output[12] = (step[3] - step[12]);
+ output[11] = (step[4] - step[11]);
+ output[10] = (step[5] - step[10]);
+ output[9] = (step[6] - step[ 9]);
+ output[8] = (step[7] - step[ 8]);
+}
+
+// Remove once an int version of iDCT is written
+#if 0
+void reference_16x16_idct_1d(double input[16], double output[16]) {
+ const double kPi = 3.141592653589793238462643383279502884;
+ const double kSqrt2 = 1.414213562373095048801688724209698;
+ for (int k = 0; k < 16; k++) {
+ output[k] = 0.0;
+ for (int n = 0; n < 16; n++) {
+ output[k] += input[n]*cos(kPi*(2*k+1)*n/32.0);
+ if (n == 0)
+ output[k] = output[k]/kSqrt2;
+ }
+ }
+}
+#endif
+
+void vp8_short_idct16x16_c(short *input, short *output, int pitch) {
+ double out[16*16], out2[16*16];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ // First transform rows
+ for (i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j + i*short_pitch];
+ butterfly_16x16_idct_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ out[j + i*16] = temp_out[j];
+ }
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ double temp_in[16], temp_out[16];
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j*16 + i];
+ butterfly_16x16_idct_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ out2[j*16 + i] = temp_out[j];
+ }
+ for (i = 0; i < 16*16; ++i)
+ output[i] = round(out2[i]/128);
+}
+#endif