diff options
-rw-r--r-- | test/variance_test.cc | 23 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 8 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c | 2 |
3 files changed, 8 insertions, 25 deletions
diff --git a/test/variance_test.cc b/test/variance_test.cc index 40b7df630..7d8118235 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P( #endif #if HAVE_AVX2 -// TODO(jzern): these prototypes can be removed after the avx2 versions are -// reenabled in vp9_rtcd_defs.pl. -extern "C" { -unsigned int vp9_sub_pixel_variance32x32_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_sub_pixel_variance64x64_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_sub_pixel_avg_variance32x32_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, - const uint8_t *second_pred); -unsigned int vp9_sub_pixel_avg_variance64x64_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, - const uint8_t *second_pred); -} + const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2; const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2; const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2; @@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 = const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 = vp9_sub_pixel_variance64x64_avx2; INSTANTIATE_TEST_CASE_P( - DISABLED_AVX2, VP9SubpelVarianceTest, + AVX2, VP9SubpelVarianceTest, ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2), make_tuple(6, 6, subpel_variance64x64_avx2))); @@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 = const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 = vp9_sub_pixel_avg_variance64x64_avx2; INSTANTIATE_TEST_CASE_P( - DISABLED_AVX2, VP9SubpelAvgVarianceTest, + AVX2, VP9SubpelAvgVarianceTest, ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2), make_tuple(6, 6, subpel_avg_variance64x64_avx2))); #endif // HAVE_AVX2 diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 36bdc8e14..708f41b87 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_ specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc"; diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c index 34ed1867f..9aa4da962 100644 --- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c +++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c @@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { #define LOAD_SRC_DST \ /* load source and destination */ \ src_reg = _mm256_loadu_si256((__m256i const *) (src)); \ - dst_reg = _mm256_load_si256((__m256i const *) (dst)); + dst_reg = _mm256_loadu_si256((__m256i const *) (dst)); #define AVG_NEXT_SRC(src_reg, size_stride) \ src_next_reg = _mm256_loadu_si256((__m256i const *) \ |