summaryrefslogtreecommitdiff
path: root/vp9/common/vp9_rtcd_defs.sh
diff options
context:
space:
mode:
authorlevytamar82 <levytamar82@gmail.com>2014-02-03 18:40:33 -0700
committerlevytamar82 <levytamar82@gmail.com>2014-02-14 16:59:11 -0700
commit52dac5d1cb08a563551e312929910be736f708c6 (patch)
treebf2407997bc82ab659b084290e95e73a07595858 /vp9/common/vp9_rtcd_defs.sh
parent0876302e4e2b1d83bacdf0395c50ee118cfafef3 (diff)
downloadlibvpx-52dac5d1cb08a563551e312929910be736f708c6.tar
libvpx-52dac5d1cb08a563551e312929910be736f708c6.tar.gz
libvpx-52dac5d1cb08a563551e312929910be736f708c6.tar.bz2
libvpx-52dac5d1cb08a563551e312929910be736f708c6.zip
AVX2 SubPixel Variance Optimization
Optimizing 2 functions to process 32 elements in parallel instead of 16: 1. vp9_sub_pixel_variance64x64 2. vp9_sub_pixel_variance32x32 both of those function were calling vp9_sub_pixel_variance16xh_ssse3 instead of calling that function, it calls vp9_sub_pixel_variance32xh_avx2 that is written in avx2 and process 32 elements in parallel. This Optimization gave 70% function level gain and 2% user level gain Change-Id: I4f5cb386b346ff6c878a094e1c3b37e418e50bde
Diffstat (limited to 'vp9/common/vp9_rtcd_defs.sh')
-rw-r--r--vp9/common/vp9_rtcd_defs.sh4
1 files changed, 2 insertions, 2 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 63171033c..a18ae9bab 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -386,7 +386,7 @@ prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_strid
specialize vp9_variance4x4 mmx $sse2_x86inc
prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc
+specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc
@@ -416,7 +416,7 @@ prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr,
specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc
prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc
+specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc