diff options
Diffstat (limited to 'vp8/encoder/dct.c')
-rw-r--r-- | vp8/encoder/dct.c | 302 |
1 files changed, 265 insertions, 37 deletions
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index c2f2d1117..6f9c68ef7 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -12,11 +12,250 @@ #include <math.h> #include "vpx_ports/config.h" +#if CONFIG_INT_8X8FDCT +static const int xC1S7 = 16069; +static const int xC2S6 = 15137; +static const int xC3S5 = 13623; +static const int xC4S4 = 11585; +static const int xC5S3 = 9102; +static const int xC6S2 = 6270; +static const int xC7S1 = 3196; +#define SHIFT_BITS 14 +#define DOROUND(X) X += (1<<(SHIFT_BITS-1)); +#define FINAL_SHIFT 3 +#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1)) +#define IN_SHIFT (FINAL_SHIFT+1) +void vp8_short_fdct8x8_c ( short * InputData, short * OutputData, int pitch) +{ + int loop; + int short_pitch = pitch>>1; + int is07, is12, is34, is56; + int is0734, is1256; + int id07, id12, id34, id56; + int irot_input_x, irot_input_y; + int icommon_product1; // Re-used product (c4s4 * (s12 - s56)) + int icommon_product2; // Re-used product (c4s4 * (d12 + d56)) + int temp1, temp2; // intermediate variable for computation + + int InterData[64]; + int *ip = InterData; + short *op = OutputData; + + for (loop = 0; loop < 8; loop++) + { + // Pre calculate some common sums and differences. + is07 = (InputData[0] + InputData[7])<<IN_SHIFT; + is12 = (InputData[1] + InputData[2])<<IN_SHIFT; + is34 = (InputData[3] + InputData[4])<<IN_SHIFT; + is56 = (InputData[5] + InputData[6])<<IN_SHIFT; + id07 = (InputData[0] - InputData[7])<<IN_SHIFT; + id12 = (InputData[1] - InputData[2])<<IN_SHIFT; + id34 = (InputData[3] - InputData[4])<<IN_SHIFT; + id56 = (InputData[5] - InputData[6])<<IN_SHIFT; + + is0734 = is07 + is34; + is1256 = is12 + is56; + + // Pre-Calculate some common product terms. + icommon_product1 = xC4S4*(is12 - is56); + DOROUND(icommon_product1) + icommon_product1>>=SHIFT_BITS; + + icommon_product2 = xC4S4*(id12 + id56); + DOROUND(icommon_product2) + icommon_product2>>=SHIFT_BITS; + + + ip[0] = (xC4S4*(is0734 + is1256)); + DOROUND(ip[0]); + ip[0] >>= SHIFT_BITS; + + ip[4] = (xC4S4*(is0734 - is1256)); + DOROUND(ip[4]); + ip[4] >>= SHIFT_BITS; + + // Define inputs to rotation for outputs 2 and 6 + irot_input_x = id12 - id56; + irot_input_y = is07 - is34; + + // Apply rotation for outputs 2 and 6. + temp1=xC6S2*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[2] = temp1 + temp2; + + temp1=xC6S2*irot_input_y; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_x ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[6] = temp1 -temp2 ; + + // Define inputs to rotation for outputs 1 and 7 + irot_input_x = icommon_product1 + id07; + irot_input_y = -( id34 + icommon_product2 ); + + // Apply rotation for outputs 1 and 7. + temp1=xC1S7*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC7S1*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[1] = temp1 - temp2; + + temp1=xC7S1*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC1S7*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[7] = temp1 + temp2 ; + + // Define inputs to rotation for outputs 3 and 5 + irot_input_x = id07 - icommon_product1; + irot_input_y = id34 - icommon_product2; + + // Apply rotation for outputs 3 and 5. + temp1=xC3S5*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC5S3*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[3] = temp1 - temp2 ; + + + temp1=xC5S3*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC3S5*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[5] = temp1 + temp2; + + // Increment data pointer for next row + InputData += short_pitch ; + ip += 8; + } + + // Performed DCT on rows, now transform the columns + ip = InterData; + for (loop = 0; loop < 8; loop++) + { + // Pre calculate some common sums and differences. + is07 = ip[0 * 8] + ip[7 * 8]; + is12 = ip[1 * 8] + ip[2 * 8]; + is34 = ip[3 * 8] + ip[4 * 8]; + is56 = ip[5 * 8] + ip[6 * 8]; + + id07 = ip[0 * 8] - ip[7 * 8]; + id12 = ip[1 * 8] - ip[2 * 8]; + id34 = ip[3 * 8] - ip[4 * 8]; + id56 = ip[5 * 8] - ip[6 * 8]; + + is0734 = is07 + is34; + is1256 = is12 + is56; + + // Pre-Calculate some common product terms + icommon_product1 = xC4S4*(is12 - is56) ; + icommon_product2 = xC4S4*(id12 + id56) ; + DOROUND(icommon_product1) + DOROUND(icommon_product2) + icommon_product1>>=SHIFT_BITS; + icommon_product2>>=SHIFT_BITS; + + + temp1 = xC4S4*(is0734 + is1256) ; + temp2 = xC4S4*(is0734 - is1256) ; + DOROUND(temp1); + DOROUND(temp2); + temp1>>=SHIFT_BITS; + + temp2>>=SHIFT_BITS; + op[0*8] = (temp1 + FINAL_ROUNDING)>>FINAL_SHIFT; + op[4*8] = (temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + // Define inputs to rotation for outputs 2 and 6 + irot_input_x = id12 - id56; + irot_input_y = is07 - is34; + + // Apply rotation for outputs 2 and 6. + temp1=xC6S2*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[2*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + temp1=xC6S2*irot_input_y; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_x ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[6*8] = (temp1 -temp2 + FINAL_ROUNDING)>>FINAL_SHIFT ; + + // Define inputs to rotation for outputs 1 and 7 + irot_input_x = icommon_product1 + id07; + irot_input_y = -( id34 + icommon_product2 ); + + // Apply rotation for outputs 1 and 7. + temp1=xC1S7*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC7S1*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[1*8] = (temp1 - temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + temp1=xC7S1*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC1S7*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[7*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + // Define inputs to rotation for outputs 3 and 5 + irot_input_x = id07 - icommon_product1; + irot_input_y = id34 - icommon_product2; + + // Apply rotation for outputs 3 and 5. + temp1=xC3S5*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC5S3*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[3*8] = (temp1 - temp2 + FINAL_ROUNDING)>>FINAL_SHIFT ; + + + temp1=xC5S3*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC3S5*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[5*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + // Increment data pointer for next column. + ip ++; + op ++; + } +} +#else + void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) { int j1, i, j, k; @@ -106,7 +345,7 @@ void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) return; } - +#endif void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8 { @@ -181,52 +420,41 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; - int a2, b2, c2, d2; short *ip = input; short *op = output; - + int pitch_short = pitch >>1; for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[2])); - d1 = ((ip[1] + ip[3])); - c1 = ((ip[1] - ip[3])); - b1 = ((ip[0] - ip[2])); - - op[0] = a1 + d1; - op[1] = b1 + c1; - op[2] = b1 - c1; - op[3] = a1 - d1; - ip += pitch / 2; - op += 4; - } + a1 = ip[0 * pitch_short] + ip[3 * pitch_short]; + b1 = ip[1 * pitch_short] + ip[2 * pitch_short]; + c1 = ip[1 * pitch_short] - ip[2 * pitch_short]; + d1 = ip[0 * pitch_short] - ip[3 * pitch_short]; + + op[0] = (a1 + b1 + 1)>>1; + op[4] = (c1 + d1)>>1; + op[8] = (a1 - b1)>>1; + op[12]= (d1 - c1)>>1; + ip++; + op++; + } ip = output; op = output; for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[8]; - d1 = ip[4] + ip[12]; - c1 = ip[4] - ip[12]; - b1 = ip[0] - ip[8]; - - a2 = a1 + d1; - b2 = b1 + c1; - c2 = b1 - c1; - d2 = a1 - d1; - - a2 += a2<0; - b2 += b2<0; - c2 += c2<0; - d2 += d2<0; - - op[0] = (a2+1) >> 2; - op[4] = (b2+1) >> 2; - op[8] = (c2+1) >> 2; - op[12]= (d2+1) >> 2; + a1 = ip[0] + ip[3]; + b1 = ip[1] + ip[2]; + c1 = ip[1] - ip[2]; + d1 = ip[0] - ip[3]; - ip++; - op++; + op[0] = (a1 + b1 + 1)>>1; + op[1] = (c1 + d1)>>1; + op[2] = (a1 - b1)>>1; + op[3] = (d1 - c1)>>1; + + ip += 4; + op += 4; } -} +}
\ No newline at end of file |