summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/sad_test.cc12
-rwxr-xr-xtest/tools_common.sh59
-rw-r--r--test/variance_test.cc23
-rwxr-xr-xtest/vp9_spatial_svc_encoder.sh36
-rwxr-xr-xtest/vpxenc.sh65
-rw-r--r--vp9/common/vp9_rtcd_defs.pl12
-rw-r--r--vp9/encoder/x86/vp9_sad4d_intrin_avx2.c6
-rw-r--r--vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c2
8 files changed, 77 insertions, 138 deletions
diff --git a/test/sad_test.cc b/test/sad_test.cc
index f07a98921..e63770bd4 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -640,19 +640,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
#if HAVE_AVX2
#if CONFIG_VP9_ENCODER
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *const ref_ptr[], int ref_stride,
- unsigned int *sad_array);
-void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *const ref_ptr[], int ref_stride,
- unsigned int *sad_array);
-}
const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
+INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
make_tuple(32, 32, sad_32x32x4d_avx2),
make_tuple(64, 64, sad_64x64x4d_avx2)));
#endif // CONFIG_VP9_ENCODER
diff --git a/test/tools_common.sh b/test/tools_common.sh
index e98beadf8..0bfefba46 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -182,65 +182,6 @@ webm_io_available() {
[ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
}
-# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
-vpxenc_available() {
- [ -n $(vpx_tool_available vpxenc) ] && echo yes
-}
-
-# Wrapper function for running vpxenc. Positional parameters are interpreted as
-# follows:
-# 1 - codec name
-# 2 - input width
-# 3 - input height
-# 4 - number of frames to encode
-# 5 - path to input file
-# 6 - path to output file
-# Note: The output file path must end in .ivf to output an IVF file.
-# 7 - extra flags
-# Note: Extra flags currently supports a special case: when set to "-"
-# input is piped to vpxenc via cat.
-vpxenc() {
- local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
- local codec="${1}"
- local width=${2}
- local height=${3}
- local frames=${4}
- local input=${5}
- local output="${VPX_TEST_OUTPUT_DIR}/${6}"
- local extra_flags=${7}
-
- # Because --ivf must be within the command line to get IVF from vpxenc.
- if echo "${output}" | egrep -q 'ivf$'; then
- use_ivf=--ivf
- else
- unset use_ivf
- fi
-
- if [ "${extra_flags}" = "-" ]; then
- pipe_input=yes
- extra_flags=${8}
- else
- unset pipe_input
- fi
-
- if [ -z "${pipe_input}" ]; then
- eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \
- --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
- --output="${output}" "${input}" ${devnull}
- else
- cat "${input}" \
- | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \
- --width=${width} --height=${height} --limit=${frames} ${use_ivf} \
- ${extra_flags} --output="${output}" - ${devnull}
- fi
-
- if [ ! -e "${output}" ]; then
- # Return non-zero exit status: output file doesn't exist, so something
- # definitely went wrong.
- return 1
- fi
-}
-
# Filters strings from positional parameter one using the filter specified by
# positional parameter two. Filter behavior depends on the presence of a third
# positional parameter. When parameter three is present, strings that match the
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 40b7df630..7d8118235 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P(
#endif
#if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-unsigned int vp9_sub_pixel_variance32x32_avx2(
- const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
- const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_variance64x64_avx2(
- const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
- const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
- const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
- const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
- const uint8_t *second_pred);
-unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
- const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
- const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
- const uint8_t *second_pred);
-}
+
const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
@@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
- DISABLED_AVX2, VP9SubpelVarianceTest,
+ AVX2, VP9SubpelVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
make_tuple(6, 6, subpel_variance64x64_avx2)));
@@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
vp9_sub_pixel_avg_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
- DISABLED_AVX2, VP9SubpelAvgVarianceTest,
+ AVX2, VP9SubpelAvgVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
#endif // HAVE_AVX2
diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh
index 7a964a920..6dd5f171b 100755
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh
@@ -47,43 +47,9 @@ vp9_spatial_svc_encoder() {
[ -e "${output_file}" ] || return 1
}
-# Each mode is run with layer count 1-$vp9_ssvc_test_layers.
+# Each test is run with layer count 1-$vp9_ssvc_test_layers.
vp9_ssvc_test_layers=5
-DISABLED_vp9_spatial_svc_mode_i() {
- if [ "$(vp9_encode_available)" = "yes" ]; then
- local readonly test_name="DISABLED_vp9_spatial_svc_mode_i"
- for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
- vp9_spatial_svc_encoder "${test_name}" -m i -l ${layers}
- done
- fi
-}
-
-DISABLED_vp9_spatial_svc_mode_altip() {
- if [ "$(vp9_encode_available)" = "yes" ]; then
- local readonly test_name="DISABLED_vp9_spatial_svc_mode_altip"
- for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
- vp9_spatial_svc_encoder "${test_name}" -m "alt-ip" -l ${layers}
- done
- fi
-}
-
-DISABLED_vp9_spatial_svc_mode_ip() {
- if [ "$(vp9_encode_available)" = "yes" ]; then
- local readonly test_name="DISABLED_vp9_spatial_svc_mode_ip"
- vp9_spatial_svc_encoder "${test_name}" -m ip -l 1
- fi
-}
-
-DISABLED_vp9_spatial_svc_mode_gf() {
- if [ "$(vp9_encode_available)" = "yes" ]; then
- local readonly test_name="DISABLED_vp9_spatial_svc_mode_gf"
- for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
- vp9_spatial_svc_encoder "${test_name}" -m gf -l ${layers}
- done
- fi
-}
-
vp9_spatial_svc() {
if [ "$(vp9_encode_available)" = "yes" ]; then
local readonly test_name="vp9_spatial_svc"
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index f08c04878..dcfa7f7aa 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -39,6 +39,65 @@ vpxenc_can_encode_vp9() {
fi
}
+# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
+vpxenc_available() {
+ [ -n $(vpx_tool_available vpxenc) ] && echo yes
+}
+
+# Wrapper function for running vpxenc. Positional parameters are interpreted as
+# follows:
+# 1 - codec name
+# 2 - input width
+# 3 - input height
+# 4 - number of frames to encode
+# 5 - path to input file
+# 6 - path to output file
+# Note: The output file path must end in .ivf to output an IVF file.
+# 7 - extra flags
+# Note: Extra flags currently supports a special case: when set to "-"
+# input is piped to vpxenc via cat.
+vpxenc() {
+ local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}"
+ local codec="${1}"
+ local width=${2}
+ local height=${3}
+ local frames=${4}
+ local input=${5}
+ local output="${VPX_TEST_OUTPUT_DIR}/${6}"
+ local extra_flags=${7}
+
+ # Because --ivf must be within the command line to get IVF from vpxenc.
+ if echo "${output}" | egrep -q 'ivf$'; then
+ use_ivf=--ivf
+ else
+ unset use_ivf
+ fi
+
+ if [ "${extra_flags}" = "-" ]; then
+ pipe_input=yes
+ extra_flags=${8}
+ else
+ unset pipe_input
+ fi
+
+ if [ -z "${pipe_input}" ]; then
+ eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \
+ --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
+ --output="${output}" "${input}" ${devnull}
+ else
+ cat "${input}" \
+ | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \
+ --width=${width} --height=${height} --limit=${frames} ${use_ivf} \
+ ${extra_flags} --output="${output}" - ${devnull}
+ fi
+
+ if [ ! -e "${output}" ]; then
+ # Return non-zero exit status: output file doesn't exist, so something
+ # definitely went wrong.
+ return 1
+ fi
+}
+
vpxenc_vp8_ivf() {
if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
@@ -76,10 +135,10 @@ vpxenc_vp9_webm() {
fi
}
-DISABLED_vpxenc_vp9_ivf_lossless() {
+vpxenc_vp9_ivf_lossless() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
- "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless
+ "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless=1
fi
}
@@ -88,6 +147,6 @@ vpxenc_tests="vpxenc_vp8_ivf
vpxenc_vp8_ivf_pipe_input
vpxenc_vp9_ivf
vpxenc_vp9_webm
- DISABLED_vpxenc_vp9_ivf_lossless"
+ vpxenc_vp9_ivf_lossless"
run_tests vpxenc_verify_environment "${vpxenc_tests}"
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 8d917919b..708f41b87 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vp9_sad4x4x8 sse4/;
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;
@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
specialize qw/vp9_sad16x32x4d sse2/;
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
index f31b176e5..1feed6256 100644
--- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 32 ; i++) {
// load src and all refs
- src_reg = _mm256_load_si256((__m256i *)(src));
+ src_reg = _mm256_loadu_si256((__m256i *)(src));
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
@@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 64 ; i++) {
// load 64 bytes from src and all refs
- src_reg = _mm256_load_si256((__m256i *)(src));
- srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+ src_reg = _mm256_loadu_si256((__m256i *)(src));
+ srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
index 34ed1867f..9aa4da962 100644
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
#define LOAD_SRC_DST \
/* load source and destination */ \
src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
- dst_reg = _mm256_load_si256((__m256i const *) (dst));
+ dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
#define AVG_NEXT_SRC(src_reg, size_stride) \
src_next_reg = _mm256_loadu_si256((__m256i const *) \