summaryrefslogtreecommitdiff
path: root/vp8/encoder/onyx_if.c
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2010-10-27 08:45:24 -0400
committerYunqing Wang <yunqingwang@google.com>2010-10-27 13:36:31 -0400
commit71ecb5d7d905d1f1771b6c5e130e873dcf458b73 (patch)
tree26ef42506e0eaaf03022f4f7fbe15d617beae883 /vp8/encoder/onyx_if.c
parenta0ae3682aa67f882006c604196f7ee83eff88d84 (diff)
downloadlibvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.tar
libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.tar.gz
libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.tar.bz2
libvpx-71ecb5d7d905d1f1771b6c5e130e873dcf458b73.zip
Full search SAD function optimization in SSE4.1
Use mpsadbw, and calculate 8 sad at once. Function list: vp8_sad16x16x8_sse4 vp8_sad16x8x8_sse4 vp8_sad8x16x8_sse4 vp8_sad8x8x8_sse4 vp8_sad4x4x8_sse4 (test clip: tulip) For best quality mode, this gave encoder a 5% performance boost. For good quality mode with speed=1, this gave encoder a 3% performance boost. Change-Id: I083b5a39d39144f88dcbccbef95da6498e490134
Diffstat (limited to 'vp8/encoder/onyx_if.c')
-rw-r--r--vp8/encoder/onyx_if.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7a78b2901..5f02a5a02 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2341,6 +2341,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
+ cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
@@ -2350,6 +2351,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
+ cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
@@ -2359,6 +2361,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
+ cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
@@ -2368,6 +2371,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
+ cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
@@ -2377,6 +2381,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
+ cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
#if !(CONFIG_REALTIME_ONLY)