From 904b957ae965bd3d67f15a75cd9db7954f810d33 Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 16 Feb 2017 17:57:44 -0800 Subject: consolidate block_error functions vp9_highbd_block_error_8bit_c was a very simple wrapper around vp9_block_error_c. The SSE2 implemention was practically identical to the non-HBD one. It was missing some minor improvements which only went into the original version. In quick speed tests, the AVX implementation showed minimal improvement over SSE2 when it does not detect overflow. However, when overflow is detected the function is run a second time. The OperationCheck test seems to trigger this case and reverses any speed benefits by running ~60% slower. AVX2 on the other hand is always 30-40% faster. Change-Id: I9fcb9afbcb560f234c7ae1b13ddb69eca3988ba1 --- vp9/common/vp9_rtcd_defs.pl | 3 --- 1 file changed, 3 deletions(-) (limited to 'vp9/common/vp9_rtcd_defs.pl') diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 57af79d5b..77bebc7b9 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -130,9 +130,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; specialize qw/vp9_highbd_block_error sse2/; - add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/vp9_highbd_block_error_8bit sse2 avx/; - add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size"; specialize qw/vp9_block_error_fp sse2/; -- cgit v1.2.3