summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_dct.c
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder/vp9_dct.c')
-rw-r--r--vp9/encoder/vp9_dct.c83
1 files changed, 74 insertions, 9 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index e14421d2d..0de6393a0 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -1332,8 +1332,9 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {
#undef ROUNDING
#endif
+#if CONFIG_TX32X32 || CONFIG_TX64X64
+#if !CONFIG_DWTDCTHYBRID
#if CONFIG_TX32X32
-#if !CONFIG_DWT32X32HYBRID
static void dct32_1d(double *input, double *output, int stride) {
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
@@ -1684,8 +1685,9 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
+#endif // CONFIG_TX32X32
-#else // CONFIG_DWT32X32HYBRID
+#else // CONFIG_DWTDCTHYBRID
#define DWT_MAX_LENGTH 64
#define DWT_TYPE 26 // 26/53/97
@@ -2108,7 +2110,8 @@ static void dct16x16_1d_f(double input[16], double output[16]) {
vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
-void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch) {
+static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch,
+ int scale) {
vp9_clear_system_state(); // Make it simd safe : __asm emms;
{
int shortpitch = pitch >> 1;
@@ -2134,11 +2137,12 @@ void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch) {
}
// Scale by some magic number
for (i = 0; i < 256; i++)
- out[i] = (short)round(output[i] / (4 << DWT_PRECISION_BITS));
+ out[i] = (short)round(output[i] / (2 << scale));
}
vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
+#if CONFIG_TX32X32
void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
// assume out is a 32x32 buffer
short buffer[16 * 16];
@@ -2153,21 +2157,82 @@ void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
#endif
// TODO(debargha): Implement more efficiently by adding output pitch
// argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64);
+ vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
for (i = 0; i < 16; ++i)
vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- vp9_short_fdct16x16_c_f(out + 16, buffer, 64);
+ vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS);
for (i = 0; i < 16; ++i)
vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16);
- vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64);
+ vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
for (i = 0; i < 16; ++i)
vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16);
- vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64);
+ vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
for (i = 0; i < 16; ++i)
vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16);
}
-#endif // CONFIG_DWT32X32HYBRID
#endif // CONFIG_TX32X32
+
+#if CONFIG_TX64X64
+void vp9_short_fdct64x64_c(short *input, short *out, int pitch) {
+ // assume out is a 64x64 buffer
+ short buffer[16 * 16];
+ int i, j;
+ const int short_pitch = pitch >> 1;
+#if DWT_TYPE == 26
+ dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64);
+#elif DWT_TYPE == 97
+ dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64);
+#elif DWT_TYPE == 53
+ dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64);
+#endif
+ // TODO(debargha): Implement more efficiently by adding output pitch
+ // argument to the dct16x16 function
+ vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16);
+
+ vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16);
+
+ vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16);
+
+ vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16);
+
+ // There is no dct used on the highest bands for now.
+ // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS
+ // TODO(debargha): experiment with turning these coeffs to 0
+#if DWT_PRECISION_BITS < 1
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ out[i * 64 + 32 + j] <<= (1 - DWT_PRECISION_BITS);
+ }
+ }
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 64; ++j) {
+ out[i * 64 + j] <<= (1 - DWT_PRECISION_BITS);
+ }
+ }
+#else
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ out[i * 64 + 32 + j] >>= (DWT_PRECISION_BITS - 1);
+ }
+ }
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 64; ++j) {
+ out[i * 64 + j] >>= (DWT_PRECISION_BITS - 1);
+ }
+ }
+#endif
+}
+#endif // CONFIG_TX64X64
+#endif // CONFIG_DWTDCTHYBRID
+#endif // CONFIG_TX32X32 || CONFIG_TX64X64