diff options
author | levytamar82 <levytamar82@gmail.com> | 2014-02-03 18:40:33 -0700 |
---|---|---|
committer | levytamar82 <levytamar82@gmail.com> | 2014-02-14 16:59:11 -0700 |
commit | 52dac5d1cb08a563551e312929910be736f708c6 (patch) | |
tree | bf2407997bc82ab659b084290e95e73a07595858 /vp9/common/vp9_rtcd_defs.sh | |
parent | 0876302e4e2b1d83bacdf0395c50ee118cfafef3 (diff) | |
download | libvpx-52dac5d1cb08a563551e312929910be736f708c6.tar libvpx-52dac5d1cb08a563551e312929910be736f708c6.tar.gz libvpx-52dac5d1cb08a563551e312929910be736f708c6.tar.bz2 libvpx-52dac5d1cb08a563551e312929910be736f708c6.zip |
AVX2 SubPixel Variance Optimization
Optimizing 2 functions to process 32 elements in parallel instead of 16:
1. vp9_sub_pixel_variance64x64
2. vp9_sub_pixel_variance32x32
both of those function were calling vp9_sub_pixel_variance16xh_ssse3
instead of calling that function, it calls vp9_sub_pixel_variance32xh_avx2
that is written in avx2 and process 32 elements in parallel.
This Optimization gave 70% function level gain and 2% user level gain
Change-Id: I4f5cb386b346ff6c878a094e1c3b37e418e50bde
Diffstat (limited to 'vp9/common/vp9_rtcd_defs.sh')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 63171033c..a18ae9bab 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -386,7 +386,7 @@ prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_strid specialize vp9_variance4x4 mmx $sse2_x86inc prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc +specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc @@ -416,7 +416,7 @@ prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc +specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc |