diff options
-rw-r--r-- | test/sad_test.cc | 12 | ||||
-rwxr-xr-x | test/tools_common.sh | 59 | ||||
-rw-r--r-- | test/variance_test.cc | 23 | ||||
-rwxr-xr-x | test/vp9_spatial_svc_encoder.sh | 36 | ||||
-rwxr-xr-x | test/vpxenc.sh | 65 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 12 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_sad4d_intrin_avx2.c | 6 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c | 2 |
8 files changed, 77 insertions, 138 deletions
diff --git a/test/sad_test.cc b/test/sad_test.cc index f07a98921..e63770bd4 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -640,19 +640,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( #if HAVE_AVX2 #if CONFIG_VP9_ENCODER -// TODO(jzern): these prototypes can be removed after the avx2 versions are -// reenabled in vp9_rtcd_defs.pl. -extern "C" { -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, - const uint8_t *const ref_ptr[], int ref_stride, - unsigned int *sad_array); -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, - const uint8_t *const ref_ptr[], int ref_stride, - unsigned int *sad_array); -} const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2; const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2; -INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values( +INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values( make_tuple(32, 32, sad_32x32x4d_avx2), make_tuple(64, 64, sad_64x64x4d_avx2))); #endif // CONFIG_VP9_ENCODER diff --git a/test/tools_common.sh b/test/tools_common.sh index e98beadf8..0bfefba46 100755 --- a/test/tools_common.sh +++ b/test/tools_common.sh @@ -182,65 +182,6 @@ webm_io_available() { [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes } -# Echoes yes to stdout when vpxenc exists according to vpx_tool_available(). -vpxenc_available() { - [ -n $(vpx_tool_available vpxenc) ] && echo yes -} - -# Wrapper function for running vpxenc. Positional parameters are interpreted as -# follows: -# 1 - codec name -# 2 - input width -# 3 - input height -# 4 - number of frames to encode -# 5 - path to input file -# 6 - path to output file -# Note: The output file path must end in .ivf to output an IVF file. -# 7 - extra flags -# Note: Extra flags currently supports a special case: when set to "-" -# input is piped to vpxenc via cat. -vpxenc() { - local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}" - local codec="${1}" - local width=${2} - local height=${3} - local frames=${4} - local input=${5} - local output="${VPX_TEST_OUTPUT_DIR}/${6}" - local extra_flags=${7} - - # Because --ivf must be within the command line to get IVF from vpxenc. - if echo "${output}" | egrep -q 'ivf$'; then - use_ivf=--ivf - else - unset use_ivf - fi - - if [ "${extra_flags}" = "-" ]; then - pipe_input=yes - extra_flags=${8} - else - unset pipe_input - fi - - if [ -z "${pipe_input}" ]; then - eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \ - --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \ - --output="${output}" "${input}" ${devnull} - else - cat "${input}" \ - | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \ - --width=${width} --height=${height} --limit=${frames} ${use_ivf} \ - ${extra_flags} --output="${output}" - ${devnull} - fi - - if [ ! -e "${output}" ]; then - # Return non-zero exit status: output file doesn't exist, so something - # definitely went wrong. - return 1 - fi -} - # Filters strings from positional parameter one using the filter specified by # positional parameter two. Filter behavior depends on the presence of a third # positional parameter. When parameter three is present, strings that match the diff --git a/test/variance_test.cc b/test/variance_test.cc index 40b7df630..7d8118235 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -707,24 +707,7 @@ INSTANTIATE_TEST_CASE_P( #endif #if HAVE_AVX2 -// TODO(jzern): these prototypes can be removed after the avx2 versions are -// reenabled in vp9_rtcd_defs.pl. -extern "C" { -unsigned int vp9_sub_pixel_variance32x32_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_sub_pixel_variance64x64_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -unsigned int vp9_sub_pixel_avg_variance32x32_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, - const uint8_t *second_pred); -unsigned int vp9_sub_pixel_avg_variance64x64_avx2( - const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, - const uint8_t *second_pred); -} + const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2; const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2; const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2; @@ -743,7 +726,7 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 = const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 = vp9_sub_pixel_variance64x64_avx2; INSTANTIATE_TEST_CASE_P( - DISABLED_AVX2, VP9SubpelVarianceTest, + AVX2, VP9SubpelVarianceTest, ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2), make_tuple(6, 6, subpel_variance64x64_avx2))); @@ -752,7 +735,7 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 = const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 = vp9_sub_pixel_avg_variance64x64_avx2; INSTANTIATE_TEST_CASE_P( - DISABLED_AVX2, VP9SubpelAvgVarianceTest, + AVX2, VP9SubpelAvgVarianceTest, ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2), make_tuple(6, 6, subpel_avg_variance64x64_avx2))); #endif // HAVE_AVX2 diff --git a/test/vp9_spatial_svc_encoder.sh b/test/vp9_spatial_svc_encoder.sh index 7a964a920..6dd5f171b 100755 --- a/test/vp9_spatial_svc_encoder.sh +++ b/test/vp9_spatial_svc_encoder.sh @@ -47,43 +47,9 @@ vp9_spatial_svc_encoder() { [ -e "${output_file}" ] || return 1 } -# Each mode is run with layer count 1-$vp9_ssvc_test_layers. +# Each test is run with layer count 1-$vp9_ssvc_test_layers. vp9_ssvc_test_layers=5 -DISABLED_vp9_spatial_svc_mode_i() { - if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly test_name="DISABLED_vp9_spatial_svc_mode_i" - for layers in $(seq 1 ${vp9_ssvc_test_layers}); do - vp9_spatial_svc_encoder "${test_name}" -m i -l ${layers} - done - fi -} - -DISABLED_vp9_spatial_svc_mode_altip() { - if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly test_name="DISABLED_vp9_spatial_svc_mode_altip" - for layers in $(seq 1 ${vp9_ssvc_test_layers}); do - vp9_spatial_svc_encoder "${test_name}" -m "alt-ip" -l ${layers} - done - fi -} - -DISABLED_vp9_spatial_svc_mode_ip() { - if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly test_name="DISABLED_vp9_spatial_svc_mode_ip" - vp9_spatial_svc_encoder "${test_name}" -m ip -l 1 - fi -} - -DISABLED_vp9_spatial_svc_mode_gf() { - if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly test_name="DISABLED_vp9_spatial_svc_mode_gf" - for layers in $(seq 1 ${vp9_ssvc_test_layers}); do - vp9_spatial_svc_encoder "${test_name}" -m gf -l ${layers} - done - fi -} - vp9_spatial_svc() { if [ "$(vp9_encode_available)" = "yes" ]; then local readonly test_name="vp9_spatial_svc" diff --git a/test/vpxenc.sh b/test/vpxenc.sh index f08c04878..dcfa7f7aa 100755 --- a/test/vpxenc.sh +++ b/test/vpxenc.sh @@ -39,6 +39,65 @@ vpxenc_can_encode_vp9() { fi } +# Echoes yes to stdout when vpxenc exists according to vpx_tool_available(). +vpxenc_available() { + [ -n $(vpx_tool_available vpxenc) ] && echo yes +} + +# Wrapper function for running vpxenc. Positional parameters are interpreted as +# follows: +# 1 - codec name +# 2 - input width +# 3 - input height +# 4 - number of frames to encode +# 5 - path to input file +# 6 - path to output file +# Note: The output file path must end in .ivf to output an IVF file. +# 7 - extra flags +# Note: Extra flags currently supports a special case: when set to "-" +# input is piped to vpxenc via cat. +vpxenc() { + local encoder="${LIBVPX_BIN_PATH}/vpxenc${VPX_TEST_EXE_SUFFIX}" + local codec="${1}" + local width=${2} + local height=${3} + local frames=${4} + local input=${5} + local output="${VPX_TEST_OUTPUT_DIR}/${6}" + local extra_flags=${7} + + # Because --ivf must be within the command line to get IVF from vpxenc. + if echo "${output}" | egrep -q 'ivf$'; then + use_ivf=--ivf + else + unset use_ivf + fi + + if [ "${extra_flags}" = "-" ]; then + pipe_input=yes + extra_flags=${8} + else + unset pipe_input + fi + + if [ -z "${pipe_input}" ]; then + eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} --width=${width} \ + --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \ + --output="${output}" "${input}" ${devnull} + else + cat "${input}" \ + | eval "${VPX_TEST_PREFIX}" "${encoder}" --codec=${codec} \ + --width=${width} --height=${height} --limit=${frames} ${use_ivf} \ + ${extra_flags} --output="${output}" - ${devnull} + fi + + if [ ! -e "${output}" ]; then + # Return non-zero exit status: output file doesn't exist, so something + # definitely went wrong. + return 1 + fi +} + vpxenc_vp8_ivf() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ @@ -76,10 +135,10 @@ vpxenc_vp9_webm() { fi } -DISABLED_vpxenc_vp9_ivf_lossless() { +vpxenc_vp9_ivf_lossless() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ - "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless + "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless=1 fi } @@ -88,6 +147,6 @@ vpxenc_tests="vpxenc_vp8_ivf vpxenc_vp8_ivf_pipe_input vpxenc_vp9_ivf vpxenc_vp9_webm - DISABLED_vpxenc_vp9_ivf_lossless" + vpxenc_vp9_ivf_lossless" run_tests vpxenc_verify_environment "${vpxenc_tests}" diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 8d917919b..708f41b87 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_ specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vp9_sad4x4x8 sse4/; add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad64x64x4d sse2/; +specialize qw/vp9_sad64x64x4d sse2 avx2/; add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad32x64x4d sse2/; @@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co specialize qw/vp9_sad16x32x4d sse2/; add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad32x32x4d sse2/; +specialize qw/vp9_sad32x32x4d sse2 avx2/; add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad16x16x4d sse2/; diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c index f31b176e5..1feed6256 100644 --- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c +++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c @@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src, sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 32 ; i++) { // load src and all refs - src_reg = _mm256_load_si256((__m256i *)(src)); + src_reg = _mm256_loadu_si256((__m256i *)(src)); ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); @@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src, sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 64 ; i++) { // load 64 bytes from src and all refs - src_reg = _mm256_load_si256((__m256i *)(src)); - srcnext_reg = _mm256_load_si256((__m256i *)(src + 32)); + src_reg = _mm256_loadu_si256((__m256i *)(src)); + srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32)); ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c index 34ed1867f..9aa4da962 100644 --- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c +++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c @@ -67,7 +67,7 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { #define LOAD_SRC_DST \ /* load source and destination */ \ src_reg = _mm256_loadu_si256((__m256i const *) (src)); \ - dst_reg = _mm256_load_si256((__m256i const *) (dst)); + dst_reg = _mm256_loadu_si256((__m256i const *) (dst)); #define AVG_NEXT_SRC(src_reg, size_stride) \ src_next_reg = _mm256_loadu_si256((__m256i const *) \ |