From 7045aec00a94bd49ed979b8dbd73bb81d58670dc Mon Sep 17 00:00:00 2001 From: levytamar82 Date: Wed, 1 Oct 2014 23:47:31 -0700 Subject: SAD32xh and SAD64xh for AVX2 All sad function that process above 32 consecutive elements are optimized for AVX2: vp9_sad64x64 vp9_sad64x32 vp9_sad32x64 vp9_sad32x32 vp9_sad32x16 vp9_sad64x64_avg vp9_sad64x32_avg vp9_sad32x64_avg vp9_sad32x32_avg vp9_sad32x16_avg The functions that appeared as a hotspot is vp9_sad32x32 and vp9_sad64x64 vp9_sad32x32 was optimized by 68% and vp9_sad64x64 was optimized by 90% both of them gave and overall ~2.3% user level gain Change-Id: Iccf86b375a2b54c5fbbe685902ead0c9a561b9fd --- vp9/common/vp9_rtcd_defs.pl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'vp9/common/vp9_rtcd_defs.pl') diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 838ec44d6..a5d3ef278 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -873,22 +873,22 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_pt specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad64x64 neon/, "$sse2_x86inc"; +specialize qw/vp9_sad64x64 neon avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad32x64/, "$sse2_x86inc"; +specialize qw/vp9_sad32x64 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad64x32/, "$sse2_x86inc"; +specialize qw/vp9_sad64x32 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad32x16/, "$sse2_x86inc"; +specialize qw/vp9_sad32x16 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x32/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad32x32 neon/, "$sse2_x86inc"; +specialize qw/vp9_sad32x32 neon avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x16 neon/, "$sse2_x86inc"; @@ -912,22 +912,22 @@ add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_strid specialize qw/vp9_sad4x4/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad64x64_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad32x64_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad64x32_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad32x16_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad32x32_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; -- cgit v1.2.3