diff options
author | levytamar82 <tamar.levy@intel.com> | 2014-10-01 23:47:31 -0700 |
---|---|---|
committer | levytamar82 <tamar.levy@intel.com> | 2014-10-19 13:59:10 -0700 |
commit | 7045aec00a94bd49ed979b8dbd73bb81d58670dc (patch) | |
tree | 9db7c0d26d8eaa4d9e462961545535fd219d384e /vp9/common | |
parent | feee7d97b797dff46e9eaef0871098dee463d508 (diff) | |
download | libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.tar libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.tar.gz libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.tar.bz2 libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.zip |
SAD32xh and SAD64xh for AVX2
All sad function that process above 32 consecutive elements are optimized
for AVX2:
vp9_sad64x64
vp9_sad64x32
vp9_sad32x64
vp9_sad32x32
vp9_sad32x16
vp9_sad64x64_avg
vp9_sad64x32_avg
vp9_sad32x64_avg
vp9_sad32x32_avg
vp9_sad32x16_avg
The functions that appeared as a hotspot is vp9_sad32x32 and vp9_sad64x64
vp9_sad32x32 was optimized by 68% and vp9_sad64x64 was optimized by 90%
both of them gave and overall ~2.3% user level gain
Change-Id: Iccf86b375a2b54c5fbbe685902ead0c9a561b9fd
Diffstat (limited to 'vp9/common')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 838ec44d6..a5d3ef278 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -873,22 +873,22 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_pt specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad64x64 neon/, "$sse2_x86inc"; +specialize qw/vp9_sad64x64 neon avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad32x64/, "$sse2_x86inc"; +specialize qw/vp9_sad32x64 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad64x32/, "$sse2_x86inc"; +specialize qw/vp9_sad64x32 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad32x16/, "$sse2_x86inc"; +specialize qw/vp9_sad32x16 avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x32/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad32x32 neon/, "$sse2_x86inc"; +specialize qw/vp9_sad32x32 neon avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad16x16 neon/, "$sse2_x86inc"; @@ -912,22 +912,22 @@ add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_strid specialize qw/vp9_sad4x4/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad64x64_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad32x64_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad64x32_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad32x16_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; +specialize qw/vp9_sad32x32_avg avx2/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; |