summaryrefslogtreecommitdiff
path: root/test/vp9_quantize_test.cc
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2022-07-07 06:34:11 -0700
committerScott LaVarnway <slavarnway@google.com>2022-07-07 13:09:43 -0700
commitba56eafb5742e5c28b7a99b5442698e9a3a61683 (patch)
treedf43ab6eca8c01af86ee528d07c07bb3c73b9173 /test/vp9_quantize_test.cc
parentdbac8e01e05ad3d1b47887b1ac864339115aa721 (diff)
downloadlibvpx-ba56eafb5742e5c28b7a99b5442698e9a3a61683.tar
libvpx-ba56eafb5742e5c28b7a99b5442698e9a3a61683.tar.gz
libvpx-ba56eafb5742e5c28b7a99b5442698e9a3a61683.tar.bz2
libvpx-ba56eafb5742e5c28b7a99b5442698e9a3a61683.zip
VPX: Add quantize speed test for ref vs opt.
Bug: b/237714063 Change-Id: I4304ba8d976fed3613e28442983b04a9cfc15b79
Diffstat (limited to 'test/vp9_quantize_test.cc')
-rw-r--r--test/vp9_quantize_test.cc226
1 files changed, 134 insertions, 92 deletions
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index ca1062a76..b14a20cfc 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -67,6 +67,45 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count,
fn(coeff, count, round, quant, qcoeff, dqcoeff, dequant, eob, scan, iscan);
}
+void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
+ int16_t *quant, int16_t *quant_shift,
+ int16_t *dequant, int16_t *round_fp,
+ int16_t *quant_fp) {
+ // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
+ constexpr int kMaxQRoundingFactorFp = 64;
+
+ for (int j = 0; j < 2; j++) {
+ // The range is 4 to 1828 in the VP9 tables.
+ const int qlookup = rnd->RandRange(1825) + 4;
+ round_fp[j] = (kMaxQRoundingFactorFp * qlookup) >> 7;
+ quant_fp[j] = (1 << 16) / qlookup;
+
+ // Values determined by deconstructing vp9_init_quantizer().
+ // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
+ // values or U/V values of any bit depth. This is because y_delta is not
+ // factored into the vp9_ac_quant() call.
+ zbin[j] = rnd->RandRange(1200);
+
+ // round may be up to 685 for Y values or 914 for U/V.
+ round[j] = rnd->RandRange(914);
+ // quant ranges from 1 to -32703
+ quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
+ // quant_shift goes up to 1 << 16.
+ quant_shift[j] = rnd->RandRange(16384);
+ // dequant maxes out at 1828 for all cases.
+ dequant[j] = rnd->RandRange(1828);
+ }
+ for (int j = 2; j < 8; j++) {
+ zbin[j] = zbin[1];
+ round_fp[j] = round_fp[1];
+ quant_fp[j] = quant_fp[1];
+ round[j] = round[1];
+ quant[j] = quant[1];
+ quant_shift[j] = quant_shift[1];
+ dequant[j] = dequant[1];
+ }
+}
+
class VP9QuantizeBase : public AbstractBench {
public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
@@ -148,6 +187,7 @@ class VP9QuantizeTest : public VP9QuantizeBase,
protected:
virtual void Run();
+ void Speed(bool is_median);
const QuantizeFunc quantize_op_;
const QuantizeFunc ref_quantize_op_;
};
@@ -159,6 +199,98 @@ void VP9QuantizeTest::Run() {
scan_->iscan);
}
+void VP9QuantizeTest::Speed(bool is_median) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ ASSERT_TRUE(coeff_.Init());
+ ASSERT_TRUE(qcoeff_.Init());
+ ASSERT_TRUE(dqcoeff_.Init());
+ TX_SIZE starting_sz, ending_sz;
+
+ if (max_size_ == 16) {
+ starting_sz = TX_4X4;
+ ending_sz = TX_16X16;
+ } else {
+ starting_sz = TX_32X32;
+ ending_sz = TX_32X32;
+ }
+
+ for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
+ // zbin > coeff, zbin < coeff.
+ for (int i = 0; i < 2; ++i) {
+ // TX_TYPE defines the scan order. That is not relevant to the speed test.
+ // Pick the first one.
+ const TX_TYPE tx_type = DCT_DCT;
+ count_ = (4 << sz) * (4 << sz);
+ scan_ = &vp9_scan_orders[sz][tx_type];
+
+ GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+
+ if (i == 0) {
+ // When |coeff values| are less than zbin the results are 0.
+ int threshold = 100;
+ if (max_size_ == 32) {
+ // For 32x32, the threshold is halved. Double it to keep the values
+ // from clearing it.
+ threshold = 200;
+ }
+ for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
+ coeff_.Set(&rnd, -99, 99);
+ } else if (i == 1) {
+ for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
+ coeff_.Set(&rnd, -500, 500);
+ }
+ if (is_median) {
+ RunNTimes(10000000 / count_);
+ const char *type =
+ (i == 0) ? "Bypass calculations " : "Full calculations ";
+ char block_size[16];
+ snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
+ char title[100];
+ snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
+ PrintMedian(title);
+ } else {
+ Buffer<tran_low_t> ref_qcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_qcoeff.Init());
+ Buffer<tran_low_t> ref_dqcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_dqcoeff.Init());
+ uint16_t ref_eob = 0;
+
+ const int kNumTests = 5000000;
+ vpx_usec_timer timer, simd_timer;
+
+ vpx_usec_timer_start(&timer);
+ for (int n = 0; n < kNumTests; ++n) {
+ ref_quantize_op_(coeff_.TopLeftPixel(), count_, zbin_ptr_, r_ptr_,
+ q_ptr_, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
+ ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
+ scan_->scan, scan_->iscan);
+ }
+ vpx_usec_timer_mark(&timer);
+
+ vpx_usec_timer_start(&simd_timer);
+ for (int n = 0; n < kNumTests; ++n) {
+ quantize_op_(coeff_.TopLeftPixel(), count_, zbin_ptr_, r_ptr_, q_ptr_,
+ quant_shift_ptr_, qcoeff_.TopLeftPixel(),
+ dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_,
+ scan_->scan, scan_->iscan);
+ }
+ vpx_usec_timer_mark(&simd_timer);
+
+ const int elapsed_time =
+ static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ const int simd_elapsed_time =
+ static_cast<int>(vpx_usec_timer_elapsed(&simd_timer));
+ printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time,
+ simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time));
+ }
+ }
+ }
+}
+
// This quantizer compares the AC coefficients to the quantization step size to
// determine if further multiplication operations are needed.
// Based on vp9_quantize_fp_sse2().
@@ -254,45 +386,6 @@ void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
}
-void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
- int16_t *quant, int16_t *quant_shift,
- int16_t *dequant, int16_t *round_fp,
- int16_t *quant_fp) {
- // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
- const int max_qrounding_factor_fp = 64;
-
- for (int j = 0; j < 2; j++) {
- // The range is 4 to 1828 in the VP9 tables.
- const int qlookup = rnd->RandRange(1825) + 4;
- round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
- quant_fp[j] = (1 << 16) / qlookup;
-
- // Values determined by deconstructing vp9_init_quantizer().
- // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
- // values or U/V values of any bit depth. This is because y_delta is not
- // factored into the vp9_ac_quant() call.
- zbin[j] = rnd->RandRange(1200);
-
- // round may be up to 685 for Y values or 914 for U/V.
- round[j] = rnd->RandRange(914);
- // quant ranges from 1 to -32703
- quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
- // quant_shift goes up to 1 << 16.
- quant_shift[j] = rnd->RandRange(16384);
- // dequant maxes out at 1828 for all cases.
- dequant[j] = rnd->RandRange(1828);
- }
- for (int j = 2; j < 8; j++) {
- zbin[j] = zbin[1];
- round_fp[j] = round_fp[1];
- quant_fp[j] = quant_fp[1];
- round[j] = round[1];
- quant[j] = quant[1];
- quant_shift[j] = quant_shift[1];
- dequant[j] = dequant[1];
- }
-}
-
TEST_P(VP9QuantizeTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
ASSERT_TRUE(coeff_.Init());
@@ -403,60 +496,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
}
}
-TEST_P(VP9QuantizeTest, DISABLED_Speed) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- ASSERT_TRUE(coeff_.Init());
- ASSERT_TRUE(qcoeff_.Init());
- ASSERT_TRUE(dqcoeff_.Init());
- TX_SIZE starting_sz, ending_sz;
-
- if (max_size_ == 16) {
- starting_sz = TX_4X4;
- ending_sz = TX_16X16;
- } else {
- starting_sz = TX_32X32;
- ending_sz = TX_32X32;
- }
+TEST_P(VP9QuantizeTest, DISABLED_Speed) { Speed(false); }
- for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
- // zbin > coeff, zbin < coeff.
- for (int i = 0; i < 2; ++i) {
- // TX_TYPE defines the scan order. That is not relevant to the speed test.
- // Pick the first one.
- const TX_TYPE tx_type = DCT_DCT;
- count_ = (4 << sz) * (4 << sz);
- scan_ = &vp9_scan_orders[sz][tx_type];
-
- GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
- quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
- quant_fp_ptr_);
-
- if (i == 0) {
- // When |coeff values| are less than zbin the results are 0.
- int threshold = 100;
- if (max_size_ == 32) {
- // For 32x32, the threshold is halved. Double it to keep the values
- // from clearing it.
- threshold = 200;
- }
- for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
- coeff_.Set(&rnd, -99, 99);
- } else if (i == 1) {
- for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
- coeff_.Set(&rnd, -500, 500);
- }
-
- RunNTimes(10000000 / count_);
- const char *type =
- (i == 0) ? "Bypass calculations " : "Full calculations ";
- char block_size[16];
- snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
- char title[100];
- snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
- PrintMedian(title);
- }
- }
-}
+TEST_P(VP9QuantizeTest, DISABLED_SpeedMedian) { Speed(true); }
using std::make_tuple;