diff options
author | Yunqing Wang <yunqingwang@google.com> | 2013-03-15 11:33:10 -0700 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2013-03-18 15:34:14 -0700 |
commit | 6344c84c82f6a4f82a6a4f9f33a6d1ec85691930 (patch) | |
tree | da2f70ee88c0080ef3c0755db0bee6c6fa8bd7c1 /vp9/decoder/vp9_dequantize.c | |
parent | 449f136886e96fcf448bf9b68952977da703c614 (diff) | |
download | libvpx-6344c84c82f6a4f82a6a4f9f33a6d1ec85691930.tar libvpx-6344c84c82f6a4f82a6a4f9f33a6d1ec85691930.tar.gz libvpx-6344c84c82f6a4f82a6a4f9f33a6d1ec85691930.tar.bz2 libvpx-6344c84c82f6a4f82a6a4f9f33a6d1ec85691930.zip |
Optimize 8x8 idct function
Wrote sse2 functions of vp9_short_idct8x8 and vp9_short_idct10_8x8.
Compared to c version, the sse2 version is 2X faster. The decoder
test didn't show noticeable gain since 8x8 idct doesn't take much
of decoding time (less than 1% in my test).
Change-Id: I56313e18cd481700b3b52c4eda5ca204ca6365f3
Diffstat (limited to 'vp9/decoder/vp9_dequantize.c')
-rw-r--r-- | vp9/decoder/vp9_dequantize.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index cb4601a15..2b8c61f1e 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -229,7 +229,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[17] *= dq[1]; input[24] *= dq[1]; - vp9_short_idct10_8x8_c(input, output, 16); + vp9_short_idct10_8x8(input, output, 16); input[0] = input[1] = input[2] = input[3] = 0; input[8] = input[9] = input[10] = 0; @@ -245,7 +245,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[i] *= dq[1]; // the idct halves ( >> 1) the pitch - vp9_short_idct8x8_c(input, output, 8 << 1); + vp9_short_idct8x8(input, output, 8 << 1); vpx_memset(input, 0, 128); vp9_add_residual_8x8(output, pred, pitch, dest, stride); } |