summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2014-06-10 16:14:44 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2014-06-10 16:14:45 -0700
commitcbce09ce623ed828ecc5113dabcf60da391461ad (patch)
tree6b456331e83ab1e10440e033b0bb7f8991f58d6f
parent3659fbd38c881a7bc24eadfddfc7e1fabe1fa517 (diff)
parent520cb3f39f82520dad6f8b7254ec512b3fdbc16f (diff)
downloadlibvpx-cbce09ce623ed828ecc5113dabcf60da391461ad.tar
libvpx-cbce09ce623ed828ecc5113dabcf60da391461ad.tar.gz
libvpx-cbce09ce623ed828ecc5113dabcf60da391461ad.tar.bz2
libvpx-cbce09ce623ed828ecc5113dabcf60da391461ad.zip
Merge changes I6abc0657,I8224fba2,I04f64a45,I5d49d119,I76b4d171,I88c11ac3
* changes: vp9_sub_pixel_*variance*: disable avx2 variants vp9_sad*x4d: disable avx2 variants vp9_f(dct|ht): disable avx2 variants convolve: disable avx2 variants fdct8x8_test: add missing avx2 functions dct4x4_test: add missing avx2 functions
-rw-r--r--test/convolve_test.cc24
-rw-r--r--test/dct16x16_test.cc25
-rw-r--r--test/fdct4x4_test.cc15
-rw-r--r--test/fdct8x8_test.cc14
-rw-r--r--test/sad_test.cc20
-rw-r--r--test/variance_test.cc51
-rw-r--r--vp9/common/vp9_rtcd_defs.pl22
7 files changed, 159 insertions, 12 deletions
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 3412ddd23..6af2abb79 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -645,6 +645,26 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
#endif
#if HAVE_AVX2
+// TODO(jzern): these prototypes can be removed after the avx2 versions are
+// reenabled in vp9_rtcd_defs.pl.
+extern "C" {
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+}
+
const ConvolveFunctions convolve8_avx2(
vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
@@ -655,8 +675,10 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
make_tuple(8, 4, &convolve8_avx2),
make_tuple(4, 8, &convolve8_avx2),
make_tuple(8, 8, &convolve8_avx2),
+ make_tuple(8, 16, &convolve8_avx2)));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values(
make_tuple(16, 8, &convolve8_avx2),
- make_tuple(8, 16, &convolve8_avx2),
make_tuple(16, 16, &convolve8_avx2),
make_tuple(32, 16, &convolve8_avx2),
make_tuple(16, 32, &convolve8_avx2),
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 99c8d0c7c..e6a20fb41 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -606,4 +606,29 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0)));
#endif
+
+#if HAVE_AVX2
+// TODO(jzern): these prototypes can be removed after the avx2 versions are
+// reenabled in vp9_rtcd_defs.pl.
+extern "C" {
+void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride);
+void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, int stride,
+ int tx_type);
+}
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_AVX2, Trans16x16DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct16x16_avx2,
+ &vp9_idct16x16_256_add_c, 0)));
+INSTANTIATE_TEST_CASE_P(
+ AVX2, Trans16x16HT,
+ ::testing::Values(
+ make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 3)));
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_AVX2, Trans16x16HT,
+ ::testing::Values(
+ make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 0),
+ make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 1),
+ make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 2)));
+#endif
} // namespace
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 961eb4d6c..ec233d3f3 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -376,4 +376,19 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+ AVX2, Trans4x4DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct4x4_avx2,
+ &vp9_idct4x4_16_add_c, 0)));
+INSTANTIATE_TEST_CASE_P(
+ AVX2, Trans4x4HT,
+ ::testing::Values(
+ make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 0),
+ make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 1),
+ make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 2),
+ make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 3)));
+#endif
+
} // namespace
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 72a5fad35..146aa31c6 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -367,4 +367,18 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(
make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+ AVX2, FwdTrans8x8DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct8x8_avx2, &vp9_idct8x8_64_add_c, 0)));
+INSTANTIATE_TEST_CASE_P(
+ AVX2, FwdTrans8x8HT,
+ ::testing::Values(
+ make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 0),
+ make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 1),
+ make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 2),
+ make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 3)));
+#endif
} // namespace
diff --git a/test/sad_test.cc b/test/sad_test.cc
index f9ffa92de..89d8c4152 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -627,4 +627,24 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSSE3
+#if HAVE_AVX2
+#if CONFIG_VP9_ENCODER
+// TODO(jzern): these prototypes can be removed after the avx2 versions are
+// reenabled in vp9_rtcd_defs.pl.
+extern "C" {
+void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_ptr[], int ref_stride,
+ unsigned int *sad_array);
+void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_ptr[], int ref_stride,
+ unsigned int *sad_array);
+}
+const sad_n_by_n_by_4_fn_t sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
+const sad_n_by_n_by_4_fn_t sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
+INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
+ make_tuple(32, 32, sad_32x32x4d_avx2),
+ make_tuple(64, 64, sad_64x64x4d_avx2)));
+#endif // CONFIG_VP9_ENCODER
+#endif // HAVE_AVX2
+
} // namespace
diff --git a/test/variance_test.cc b/test/variance_test.cc
index c9bf13a6b..998569516 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -702,6 +702,57 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
#endif
#endif
+
+#if HAVE_AVX2
+// TODO(jzern): these prototypes can be removed after the avx2 versions are
+// reenabled in vp9_rtcd_defs.pl.
+extern "C" {
+unsigned int vp9_sub_pixel_variance32x32_avx2(
+ const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance64x64_avx2(
+ const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
+ const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
+ const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
+ const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
+ const uint8_t *second_pred);
+}
+const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
+const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
+const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
+const vp9_variance_fn_t variance64x32_avx2 = vp9_variance64x32_avx2;
+const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2;
+INSTANTIATE_TEST_CASE_P(
+ AVX2, VP9VarianceTest,
+ ::testing::Values(make_tuple(4, 4, variance16x16_avx2),
+ make_tuple(5, 4, variance32x16_avx2),
+ make_tuple(5, 5, variance32x32_avx2),
+ make_tuple(6, 5, variance64x32_avx2),
+ make_tuple(6, 6, variance64x64_avx2)));
+
+const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
+ vp9_sub_pixel_variance32x32_avx2;
+const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
+ vp9_sub_pixel_variance64x64_avx2;
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_AVX2, VP9SubpelVarianceTest,
+ ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
+ make_tuple(6, 6, subpel_variance64x64_avx2)));
+
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
+ vp9_sub_pixel_avg_variance32x32_avx2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
+ vp9_sub_pixel_avg_variance64x64_avx2;
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_AVX2, VP9SubpelAvgVarianceTest,
+ ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
+ make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
+#endif // HAVE_AVX2
#endif // CONFIG_VP9_ENCODER
} // namespace vp9
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 09ce72ef2..06ed47079 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_neon_asm=vp9_convolve8_neon;
add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vp9_sad4x4x8 sse4/;
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2 avx2/;
+specialize qw/vp9_sad64x64x4d sse2/;
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;
@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
specialize qw/vp9_sad16x32x4d sse2/;
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2 avx2/;
+specialize qw/vp9_sad32x32x4d sse2/;
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;
@@ -739,7 +739,7 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid
specialize qw/vp9_fht8x8 sse2 avx2/;
add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
-specialize qw/vp9_fht16x16 sse2 avx2/;
+specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
@@ -751,7 +751,7 @@ add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stri
specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct16x16 sse2 avx2/;
+specialize qw/vp9_fdct16x16 sse2/;
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct32x32 sse2 avx2/;