summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2023-05-05 15:33:21 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-05-05 15:33:21 +0000
commit17f1a23f5510111ebc4d0ea99aa302fc392fc810 (patch)
tree7a8f69e95699b314e526f7425c8f4939ec797e6d /test
parent3580bc559a223818a29c263e8b59d72ce56dddd5 (diff)
parent255ee1888589aa15ae909b992fe123c0358b1730 (diff)
downloadlibvpx-17f1a23f5510111ebc4d0ea99aa302fc392fc810.tar
libvpx-17f1a23f5510111ebc4d0ea99aa302fc392fc810.tar.gz
libvpx-17f1a23f5510111ebc4d0ea99aa302fc392fc810.tar.bz2
libvpx-17f1a23f5510111ebc4d0ea99aa302fc392fc810.zip
Merge "Add AVX2 intrinsic for idct16x16 and idct32x32 functions" into main
Diffstat (limited to 'test')
-rw-r--r--test/dct16x16_test.cc98
-rw-r--r--test/dct32x32_test.cc197
2 files changed, 292 insertions, 3 deletions
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 3c104f3a4..4ad2263cf 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -592,7 +592,7 @@ class Trans16x16TestBase {
const int count_test_block = 10000;
const int eob = 10;
const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
- DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+ DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
@@ -643,6 +643,80 @@ class Trans16x16TestBase {
}
}
+ void RunInvTrans16x16SpeedTest(IdctFunc ref_txfm, int thresh) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 10000;
+ const int eob = 10;
+ const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
+ int64_t c_sum_time = 0;
+ int64_t simd_sum_time = 0;
+ DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ if (j < eob) {
+ // Random values less than the threshold, either positive or negative
+ coeff[scan[j]] = rnd(thresh);
+ } else {
+ coeff[scan[j]] = 0;
+ }
+ if (bit_depth_ == VPX_BITS_8) {
+ dst[j] = 0;
+ ref[j] = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ dst16[j] = 0;
+ ref16[j] = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ if (bit_depth_ == VPX_BITS_8) {
+ vpx_usec_timer timer_c;
+ vpx_usec_timer_start(&timer_c);
+ for (int i = 0; i < count_test_block; ++i) {
+ ref_txfm(coeff, ref, pitch_);
+ }
+ vpx_usec_timer_mark(&timer_c);
+ c_sum_time += vpx_usec_timer_elapsed(&timer_c);
+
+ vpx_usec_timer timer_mod;
+ vpx_usec_timer_start(&timer_mod);
+ for (int i = 0; i < count_test_block; ++i) {
+ RunInvTxfm(coeff, dst, pitch_);
+ }
+ vpx_usec_timer_mark(&timer_mod);
+ simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_usec_timer timer_c;
+ vpx_usec_timer_start(&timer_c);
+ for (int i = 0; i < count_test_block; ++i) {
+ ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+ }
+ vpx_usec_timer_mark(&timer_c);
+ c_sum_time += vpx_usec_timer_elapsed(&timer_c);
+
+ vpx_usec_timer timer_mod;
+ vpx_usec_timer_start(&timer_mod);
+ for (int i = 0; i < count_test_block; ++i) {
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
+ }
+ vpx_usec_timer_mark(&timer_mod);
+ simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ printf(
+ "c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
+ c_sum_time, simd_sum_time,
+ (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
+ }
+
int pitch_;
int tx_type_;
vpx_bit_depth_t bit_depth_;
@@ -755,7 +829,6 @@ TEST_P(Trans16x16HT, QuantCheck) {
RunQuantCheck(429, 729);
}
-#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
class InvTrans16x16DCT : public Trans16x16TestBase,
public ::testing::TestWithParam<Idct16x16Param> {
public:
@@ -786,7 +859,10 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans16x16DCT);
TEST_P(InvTrans16x16DCT, CompareReference) {
CompareInvReference(ref_txfm_, thresh_);
}
-#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+TEST_P(InvTrans16x16DCT, DISABLED_Speed) {
+ RunInvTrans16x16SpeedTest(ref_txfm_, thresh_);
+}
using std::make_tuple;
@@ -828,6 +904,12 @@ INSTANTIATE_TEST_SUITE_P(
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+
+INSTANTIATE_TEST_SUITE_P(C, InvTrans16x16DCT,
+ ::testing::Values(make_tuple(&vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_256_add_c,
+ 6225, VPX_BITS_8)));
+
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -862,6 +944,11 @@ INSTANTIATE_TEST_SUITE_P(
2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
3, VPX_BITS_8)));
+
+INSTANTIATE_TEST_SUITE_P(SSE2, InvTrans16x16DCT,
+ ::testing::Values(make_tuple(
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_256_add_sse2, 6225, VPX_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -869,6 +956,11 @@ INSTANTIATE_TEST_SUITE_P(
AVX2, Trans16x16DCT,
::testing::Values(make_tuple(&vpx_fdct16x16_avx2,
&vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
+
+INSTANTIATE_TEST_SUITE_P(AVX2, InvTrans16x16DCT,
+ ::testing::Values(make_tuple(
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_256_add_avx2, 6225, VPX_BITS_8)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index 91bb8e01e..1167038b5 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -24,10 +24,12 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/msvc.h" // for round()
+#include "vpx_ports/vpx_timer.h"
using libvpx_test::ACMRandom;
@@ -71,6 +73,9 @@ typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef std::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
Trans32x32Param;
+typedef std::tuple<InvTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t, int, int>
+ InvTrans32x32Param;
+
#if CONFIG_VP9_HIGHBITDEPTH
void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
@@ -314,6 +319,174 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
}
}
+class InvTrans32x32Test : public ::testing::TestWithParam<InvTrans32x32Param> {
+ public:
+ virtual ~InvTrans32x32Test() {}
+ virtual void SetUp() {
+ ref_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ version_ = GET_PARAM(2); // 0: high precision forward transform
+ // 1: low precision version for rd loop
+ bit_depth_ = GET_PARAM(3);
+ eob_ = GET_PARAM(4);
+ thresh_ = GET_PARAM(4);
+ mask_ = (1 << bit_depth_) - 1;
+ pitch_ = 32;
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void RunRefTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+ ref_txfm_(out, dst, stride);
+ }
+ void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride);
+ }
+ int version_;
+ vpx_bit_depth_t bit_depth_;
+ int mask_;
+ int eob_;
+ int thresh_;
+
+ InvTxfmFunc ref_txfm_;
+ InvTxfmFunc inv_txfm_;
+ int pitch_;
+
+ void RunInvTrans32x32SpeedTest() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 10000;
+ int64_t c_sum_time = 0;
+ int64_t simd_sum_time = 0;
+ const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
+ DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ if (j < eob_) {
+ // Random values less than the threshold, either positive or negative
+ coeff[scan[j]] = rnd(thresh_);
+ } else {
+ coeff[scan[j]] = 0;
+ }
+ if (bit_depth_ == VPX_BITS_8) {
+ dst[j] = 0;
+ ref[j] = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ dst16[j] = 0;
+ ref16[j] = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ if (bit_depth_ == VPX_BITS_8) {
+ vpx_usec_timer timer_c;
+ vpx_usec_timer_start(&timer_c);
+ for (int i = 0; i < count_test_block; ++i) {
+ RunRefTxfm(coeff, ref, pitch_);
+ }
+ vpx_usec_timer_mark(&timer_c);
+ c_sum_time += vpx_usec_timer_elapsed(&timer_c);
+
+ vpx_usec_timer timer_mod;
+ vpx_usec_timer_start(&timer_mod);
+ for (int i = 0; i < count_test_block; ++i) {
+ RunInvTxfm(coeff, dst, pitch_);
+ }
+ vpx_usec_timer_mark(&timer_mod);
+ simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_usec_timer timer_c;
+ vpx_usec_timer_start(&timer_c);
+ for (int i = 0; i < count_test_block; ++i) {
+ RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+ }
+ vpx_usec_timer_mark(&timer_c);
+ c_sum_time += vpx_usec_timer_elapsed(&timer_c);
+
+ vpx_usec_timer timer_mod;
+ vpx_usec_timer_start(&timer_mod);
+ for (int i = 0; i < count_test_block; ++i) {
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
+ }
+ vpx_usec_timer_mark(&timer_mod);
+ simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ printf(
+ "c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
+ c_sum_time, simd_sum_time,
+ (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
+ }
+
+ void CompareInvReference32x32() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 10000;
+ const int eob = 31;
+ const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
+ DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+ DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (int i = 0; i < count_test_block; ++i) {
+ for (int j = 0; j < kNumCoeffs; ++j) {
+ if (j < eob) {
+ coeff[scan[j]] = rnd.Rand8Extremes();
+ } else {
+ coeff[scan[j]] = 0;
+ }
+ if (bit_depth_ == VPX_BITS_8) {
+ dst[j] = 0;
+ ref[j] = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ dst16[j] = 0;
+ ref16[j] = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+ if (bit_depth_ == VPX_BITS_8) {
+ RunRefTxfm(coeff, ref, pitch_);
+ RunInvTxfm(coeff, dst, pitch_);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+ ASM_REGISTER_STATE_CHECK(
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const uint32_t diff =
+ bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
+#else
+ const uint32_t diff = dst[j] - ref[j];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const uint32_t error = diff * diff;
+ EXPECT_EQ(0u, error) << "Error: 32x32 IDCT Comparison has error "
+ << error << " at index " << j;
+ }
+ }
+ }
+};
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans32x32Test);
+
+TEST_P(InvTrans32x32Test, DISABLED_Speed) { RunInvTrans32x32SpeedTest(); }
+TEST_P(InvTrans32x32Test, CompareReference) { CompareInvReference32x32(); }
+
using std::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -334,6 +507,14 @@ INSTANTIATE_TEST_SUITE_P(
VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
1, VPX_BITS_8)));
+
+INSTANTIATE_TEST_SUITE_P(
+ C, InvTrans32x32Test,
+ ::testing::Values(
+ (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_c, 0,
+ VPX_BITS_8, 32, 6225)),
+ make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_c, 0,
+ VPX_BITS_8, 16, 6255)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
@@ -352,6 +533,14 @@ INSTANTIATE_TEST_SUITE_P(
&vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_sse2,
&vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+
+INSTANTIATE_TEST_SUITE_P(
+ SSE2, InvTrans32x32Test,
+ ::testing::Values(
+ (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_sse2, 0,
+ VPX_BITS_8, 32, 6225)),
+ make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_sse2, 0,
+ VPX_BITS_8, 16, 6225)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -377,6 +566,14 @@ INSTANTIATE_TEST_SUITE_P(
&vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_avx2,
&vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, InvTrans32x32Test,
+ ::testing::Values(
+ (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_avx2, 0,
+ VPX_BITS_8, 32, 6225)),
+ make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_avx2, 0,
+ VPX_BITS_8, 16, 6225)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE