diff options
author | Yunqing Wang <yunqingwang@google.com> | 2010-10-27 08:45:24 -0400 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2010-10-27 13:36:31 -0400 |
commit | 71ecb5d7d905d1f1771b6c5e130e873dcf458b73 (patch) | |
tree | 26ef42506e0eaaf03022f4f7fbe15d617beae883 /vp8/encoder/onyx_if.c | |
parent | a0ae3682aa67f882006c604196f7ee83eff88d84 (diff) | |
download | libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.tar libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.tar.gz libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.tar.bz2 libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.zip |
Full search SAD function optimization in SSE4.1
Use mpsadbw, and calculate 8 sad at once. Function list:
vp8_sad16x16x8_sse4
vp8_sad16x8x8_sse4
vp8_sad8x16x8_sse4
vp8_sad8x8x8_sse4
vp8_sad4x4x8_sse4
(test clip: tulip)
For best quality mode, this gave encoder a 5% performance boost.
For good quality mode with speed=1, this gave encoder a 3%
performance boost.
Change-Id: I083b5a39d39144f88dcbccbef95da6498e490134
Diffstat (limited to 'vp8/encoder/onyx_if.c')
-rw-r--r-- | vp8/encoder/onyx_if.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 7a78b2901..5f02a5a02 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2341,6 +2341,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v); cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv); cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); + cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8); cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); @@ -2350,6 +2351,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); + cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8); cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); @@ -2359,6 +2361,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); + cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8); cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); @@ -2368,6 +2371,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); + cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8); cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); @@ -2377,6 +2381,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); + cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8); cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); #if !(CONFIG_REALTIME_ONLY) |