diff options
author | levytamar82 <levytamar82@gmail.com> | 2014-02-23 23:58:56 -0700 |
---|---|---|
committer | levytamar82 <levytamar82@gmail.com> | 2014-02-28 22:51:04 -0700 |
commit | ea1490968762b96f41548d792e84c7a195bb881f (patch) | |
tree | 25beaf337a864f27fc8169fe37889964d9eaffac /vp9/common | |
parent | 5a7ac03b9ea8e75f84982e5bc6d3145c8a43d41c (diff) | |
download | libvpx-ea1490968762b96f41548d792e84c7a195bb881f.tar libvpx-ea1490968762b96f41548d792e84c7a195bb881f.tar.gz libvpx-ea1490968762b96f41548d792e84c7a195bb881f.tar.bz2 libvpx-ea1490968762b96f41548d792e84c7a195bb881f.zip |
AVX2 SubPixel AVG Variance Optimization
Optimizing 2 functions to process 32 elements in parallel instead of 16:
1. vp9_sub_pixel_avg_variance64x64
2. vp9_sub_pixel_avg_variance32x32
both of those function were calling vp9_sub_pixel_avg_variance16xh_ssse3
instead of calling that function, it calls vp9_sub_pixel_avg_variance32xh_avx2
that is written in avx2 and process 32 elements in parallel.
This Optimization gave 80% function level gain and 2% user level gain
Change-Id: Iea694654e1b7612dc6ed11e2626208c2179502c8
Diffstat (limited to 'vp9/common')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.sh | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index a18ae9bab..bd9571e0a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -389,7 +389,7 @@ prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc +specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc @@ -419,7 +419,7 @@ prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc +specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc |