diff options
author | Yunqing Wang <yunqingwang@google.com> | 2013-02-26 16:27:41 -0800 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2013-02-26 17:16:13 -0800 |
commit | 35bc02c6eb22602997d9c8aebeb46ef588266cc4 (patch) | |
tree | 54ab06054f3f5bcef8f214b00fe448c4da19af46 /vp9/decoder | |
parent | 9770d564f4984e6a0d3cfdfb7e5b8bc83f52dccf (diff) | |
download | libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.tar libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.tar.gz libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.tar.bz2 libvpx-35bc02c6eb22602997d9c8aebeb46ef588266cc4.zip |
Optimize vp9_dc_only_idct_add_c function
Wrote SSE2 version of vp9_dc_only_idct_add_c function. In order to
improve performance, clipped the absolute diff values to [0, 255].
This allowed us to keep the additions/subtractions in 8 bits.
Test showed an over 2% decoder performance increase.
Change-Id: Ie1a236d23d207e4ffcd1fc9f3d77462a9c7fe09d
Diffstat (limited to 'vp9/decoder')
-rw-r--r-- | vp9/decoder/vp9_decodframe.c | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_idct_blk.c | 6 |
2 files changed, 4 insertions, 4 deletions
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index d42bccd88..05a1bf9e0 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -126,7 +126,7 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1; xd->inv_txm4x4 = vp9_short_idct4x4llm; xd->itxm_add = vp9_dequant_idct_add; - xd->dc_only_itxm_add = vp9_dc_only_idct_add_c; + xd->dc_only_itxm_add = vp9_dc_only_idct_add; xd->itxm_add_y_block = vp9_dequant_idct_add_y_block; xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block; if (xd->lossless) { diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 6e55e45ae..50d874470 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -47,7 +47,7 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, if (xd->block[i * 4 + j].eob > 1) vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride); else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride); + vp9_dc_only_idct_add(q[0]*dq[0], pre, dst, 16, stride); ((int *)q)[0] = 0; } @@ -72,7 +72,7 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, if (xd->block[16 + i * 2 + j].eob > 1) vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride); else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride); + vp9_dc_only_idct_add(q[0]*dq[0], pre, dstu, 8, stride); ((int *)q)[0] = 0; } @@ -90,7 +90,7 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, if (xd->block[20 + i * 2 + j].eob > 1) vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride); else { - vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride); + vp9_dc_only_idct_add(q[0]*dq[0], pre, dstv, 8, stride); ((int *)q)[0] = 0; } |