summaryrefslogtreecommitdiff
path: root/vp9/decoder/vp9_idct_blk.c
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2013-02-26 16:27:41 -0800
committerYunqing Wang <yunqingwang@google.com>2013-02-26 17:16:13 -0800
commit35bc02c6eb22602997d9c8aebeb46ef588266cc4 (patch)
tree54ab06054f3f5bcef8f214b00fe448c4da19af46 /vp9/decoder/vp9_idct_blk.c
parent9770d564f4984e6a0d3cfdfb7e5b8bc83f52dccf (diff)
downloadlibvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.tar
libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.tar.gz
libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.tar.bz2
libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.zip
Optimize vp9_dc_only_idct_add_c function
Wrote SSE2 version of vp9_dc_only_idct_add_c function. In order to improve performance, clipped the absolute diff values to [0, 255]. This allowed us to keep the additions/subtractions in 8 bits. Test showed an over 2% decoder performance increase. Change-Id: Ie1a236d23d207e4ffcd1fc9f3d77462a9c7fe09d
Diffstat (limited to 'vp9/decoder/vp9_idct_blk.c')
-rw-r--r--vp9/decoder/vp9_idct_blk.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index 6e55e45ae..50d874470 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -47,7 +47,7 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
if (xd->block[i * 4 + j].eob > 1)
vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride);
else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride);
+ vp9_dc_only_idct_add(q[0]*dq[0], pre, dst, 16, stride);
((int *)q)[0] = 0;
}
@@ -72,7 +72,7 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
if (xd->block[16 + i * 2 + j].eob > 1)
vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride);
else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride);
+ vp9_dc_only_idct_add(q[0]*dq[0], pre, dstu, 8, stride);
((int *)q)[0] = 0;
}
@@ -90,7 +90,7 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
if (xd->block[20 + i * 2 + j].eob > 1)
vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride);
else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride);
+ vp9_dc_only_idct_add(q[0]*dq[0], pre, dstv, 8, stride);
((int *)q)[0] = 0;
}