From da3f08fac3f35a4d0a6f2d170ba5a27e9719eb73 Mon Sep 17 00:00:00 2001 From: Jian Zhou Date: Wed, 25 Nov 2015 12:28:39 -0800 Subject: Speed up h_predictor_8x8 Relocate the function from SSSE3 to SSE2, Unroll loop from 4 to 2, and reduce mem access to left. Speed up by >20% in ./test_intra_pred_speed. Change-Id: Ib9f1846819783b6e05e2a310c930eb844b2b4d2e --- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vpx_dsp/vpx_dsp_rtcd_defs.pl') diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index a05cf8fa7..989171b88 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -130,7 +130,7 @@ add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d63e_predictor_8x8/; add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_8x8/; -- cgit v1.2.3