summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
authorlevytamar82 <levytamar82@gmail.com>2014-02-23 23:58:56 -0700
committerlevytamar82 <levytamar82@gmail.com>2014-02-28 22:51:04 -0700
commitea1490968762b96f41548d792e84c7a195bb881f (patch)
tree25beaf337a864f27fc8169fe37889964d9eaffac /vp9/common
parent5a7ac03b9ea8e75f84982e5bc6d3145c8a43d41c (diff)
downloadlibvpx-ea1490968762b96f41548d792e84c7a195bb881f.tar
libvpx-ea1490968762b96f41548d792e84c7a195bb881f.tar.gz
libvpx-ea1490968762b96f41548d792e84c7a195bb881f.tar.bz2
libvpx-ea1490968762b96f41548d792e84c7a195bb881f.zip
AVX2 SubPixel AVG Variance Optimization
Optimizing 2 functions to process 32 elements in parallel instead of 16: 1. vp9_sub_pixel_avg_variance64x64 2. vp9_sub_pixel_avg_variance32x32 both of those function were calling vp9_sub_pixel_avg_variance16xh_ssse3 instead of calling that function, it calls vp9_sub_pixel_avg_variance32xh_avx2 that is written in avx2 and process 32 elements in parallel. This Optimization gave 80% function level gain and 2% user level gain Change-Id: Iea694654e1b7612dc6ed11e2626208c2179502c8
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_rtcd_defs.sh4
1 files changed, 2 insertions, 2 deletions
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index a18ae9bab..bd9571e0a 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -389,7 +389,7 @@ prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int
specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc
+specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc
@@ -419,7 +419,7 @@ prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int
specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc
+specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc