summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
authorlevytamar82 <tamar.levy@intel.com>2014-10-01 23:47:31 -0700
committerlevytamar82 <tamar.levy@intel.com>2014-10-19 13:59:10 -0700
commit7045aec00a94bd49ed979b8dbd73bb81d58670dc (patch)
tree9db7c0d26d8eaa4d9e462961545535fd219d384e /vp9/common
parentfeee7d97b797dff46e9eaef0871098dee463d508 (diff)
downloadlibvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.tar
libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.tar.gz
libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.tar.bz2
libvpx-7045aec00a94bd49ed979b8dbd73bb81d58670dc.zip
SAD32xh and SAD64xh for AVX2
All sad function that process above 32 consecutive elements are optimized for AVX2: vp9_sad64x64 vp9_sad64x32 vp9_sad32x64 vp9_sad32x32 vp9_sad32x16 vp9_sad64x64_avg vp9_sad64x32_avg vp9_sad32x64_avg vp9_sad32x32_avg vp9_sad32x16_avg The functions that appeared as a hotspot is vp9_sad32x32 and vp9_sad64x64 vp9_sad32x32 was optimized by 68% and vp9_sad64x64 was optimized by 90% both of them gave and overall ~2.3% user level gain Change-Id: Iccf86b375a2b54c5fbbe685902ead0c9a561b9fd
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_rtcd_defs.pl20
1 files changed, 10 insertions, 10 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 838ec44d6..a5d3ef278 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -873,22 +873,22 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_pt
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad64x64 neon/, "$sse2_x86inc";
+specialize qw/vp9_sad64x64 neon avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x64/, "$sse2_x86inc";
+specialize qw/vp9_sad32x64 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad64x32/, "$sse2_x86inc";
+specialize qw/vp9_sad64x32 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x16/, "$sse2_x86inc";
+specialize qw/vp9_sad32x16 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_sad32x32 neon avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x16 neon/, "$sse2_x86inc";
@@ -912,22 +912,22 @@ add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_strid
specialize qw/vp9_sad4x4/, "$sse_x86inc";
add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad64x64_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad32x64_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad64x32_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad32x16_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad32x32_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";