summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_dct.c
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder/vp9_dct.c')
-rw-r--r--vp9/encoder/vp9_dct.c82
1 files changed, 60 insertions, 22 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 6365ed9a2..ecd3e2dd3 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -37,30 +37,68 @@ static void fdct4_1d(int16_t *input, int16_t *output) {
}
void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
- int16_t out[4 * 4];
- int16_t *outptr = &out[0];
- const int short_pitch = pitch >> 1;
- int i, j;
- int16_t temp_in[4], temp_out[4];
-
- // Columns
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * short_pitch + i] << 4;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- fdct4_1d(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- outptr[j * 4 + i] = temp_out[j];
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we tranpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ const int stride = pitch >> 1;
+ int pass;
+ // We need an intermediate buffer between passes.
+ int16_t intermediate[4 * 4];
+ int16_t *in = input;
+ int16_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ /*canbe16*/ int input[4];
+ /*canbe16*/ int step[4];
+ /*needs32*/ int temp1, temp2;
+ int i;
+ for (i = 0; i < 4; ++i) {
+ // Load inputs.
+ if (0 == pass) {
+ input[0] = in[0 * stride] << 4;
+ input[1] = in[1 * stride] << 4;
+ input[2] = in[2 * stride] << 4;
+ input[3] = in[3 * stride] << 4;
+ if (i == 0 && input[0]) {
+ input[0] += 1;
+ }
+ } else {
+ input[0] = in[0 * 4];
+ input[1] = in[1 * 4];
+ input[2] = in[2 * 4];
+ input[3] = in[3 * 4];
+ }
+ // Transform.
+ step[0] = input[0] + input[3];
+ step[1] = input[1] + input[2];
+ step[2] = input[1] - input[2];
+ step[3] = input[0] - input[3];
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ out[0] = dct_const_round_shift(temp1);
+ out[2] = dct_const_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ out[1] = dct_const_round_shift(temp1);
+ out[3] = dct_const_round_shift(temp2);
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in++;
+ out += 4;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
}
- // Rows
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- fdct4_1d(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ {
+ int i, j;
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
+ }
}
}