diff options
-rw-r--r-- | test/vp9_quantize_test.cc | 226 |
1 files changed, 134 insertions, 92 deletions
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc index ca1062a76..b14a20cfc 100644 --- a/test/vp9_quantize_test.cc +++ b/test/vp9_quantize_test.cc @@ -67,6 +67,45 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, fn(coeff, count, round, quant, qcoeff, dqcoeff, dequant, eob, scan, iscan); } +void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, + int16_t *quant, int16_t *quant_shift, + int16_t *dequant, int16_t *round_fp, + int16_t *quant_fp) { + // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V. + constexpr int kMaxQRoundingFactorFp = 64; + + for (int j = 0; j < 2; j++) { + // The range is 4 to 1828 in the VP9 tables. + const int qlookup = rnd->RandRange(1825) + 4; + round_fp[j] = (kMaxQRoundingFactorFp * qlookup) >> 7; + quant_fp[j] = (1 << 16) / qlookup; + + // Values determined by deconstructing vp9_init_quantizer(). + // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y + // values or U/V values of any bit depth. This is because y_delta is not + // factored into the vp9_ac_quant() call. + zbin[j] = rnd->RandRange(1200); + + // round may be up to 685 for Y values or 914 for U/V. + round[j] = rnd->RandRange(914); + // quant ranges from 1 to -32703 + quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703; + // quant_shift goes up to 1 << 16. + quant_shift[j] = rnd->RandRange(16384); + // dequant maxes out at 1828 for all cases. + dequant[j] = rnd->RandRange(1828); + } + for (int j = 2; j < 8; j++) { + zbin[j] = zbin[1]; + round_fp[j] = round_fp[1]; + quant_fp[j] = quant_fp[1]; + round[j] = round[1]; + quant[j] = quant[1]; + quant_shift[j] = quant_shift[1]; + dequant[j] = dequant[1]; + } +} + class VP9QuantizeBase : public AbstractBench { public: VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp) @@ -148,6 +187,7 @@ class VP9QuantizeTest : public VP9QuantizeBase, protected: virtual void Run(); + void Speed(bool is_median); const QuantizeFunc quantize_op_; const QuantizeFunc ref_quantize_op_; }; @@ -159,6 +199,98 @@ void VP9QuantizeTest::Run() { scan_->iscan); } +void VP9QuantizeTest::Speed(bool is_median) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + ASSERT_TRUE(coeff_.Init()); + ASSERT_TRUE(qcoeff_.Init()); + ASSERT_TRUE(dqcoeff_.Init()); + TX_SIZE starting_sz, ending_sz; + + if (max_size_ == 16) { + starting_sz = TX_4X4; + ending_sz = TX_16X16; + } else { + starting_sz = TX_32X32; + ending_sz = TX_32X32; + } + + for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) { + // zbin > coeff, zbin < coeff. + for (int i = 0; i < 2; ++i) { + // TX_TYPE defines the scan order. That is not relevant to the speed test. + // Pick the first one. + const TX_TYPE tx_type = DCT_DCT; + count_ = (4 << sz) * (4 << sz); + scan_ = &vp9_scan_orders[sz][tx_type]; + + GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, + quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, + quant_fp_ptr_); + + if (i == 0) { + // When |coeff values| are less than zbin the results are 0. + int threshold = 100; + if (max_size_ == 32) { + // For 32x32, the threshold is halved. Double it to keep the values + // from clearing it. + threshold = 200; + } + for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold; + coeff_.Set(&rnd, -99, 99); + } else if (i == 1) { + for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50; + coeff_.Set(&rnd, -500, 500); + } + if (is_median) { + RunNTimes(10000000 / count_); + const char *type = + (i == 0) ? "Bypass calculations " : "Full calculations "; + char block_size[16]; + snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz); + char title[100]; + snprintf(title, sizeof(title), "%25s %8s ", type, block_size); + PrintMedian(title); + } else { + Buffer<tran_low_t> ref_qcoeff = + Buffer<tran_low_t>(max_size_, max_size_, 0, 32); + ASSERT_TRUE(ref_qcoeff.Init()); + Buffer<tran_low_t> ref_dqcoeff = + Buffer<tran_low_t>(max_size_, max_size_, 0, 32); + ASSERT_TRUE(ref_dqcoeff.Init()); + uint16_t ref_eob = 0; + + const int kNumTests = 5000000; + vpx_usec_timer timer, simd_timer; + + vpx_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + ref_quantize_op_(coeff_.TopLeftPixel(), count_, zbin_ptr_, r_ptr_, + q_ptr_, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(), + ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob, + scan_->scan, scan_->iscan); + } + vpx_usec_timer_mark(&timer); + + vpx_usec_timer_start(&simd_timer); + for (int n = 0; n < kNumTests; ++n) { + quantize_op_(coeff_.TopLeftPixel(), count_, zbin_ptr_, r_ptr_, q_ptr_, + quant_shift_ptr_, qcoeff_.TopLeftPixel(), + dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, + scan_->scan, scan_->iscan); + } + vpx_usec_timer_mark(&simd_timer); + + const int elapsed_time = + static_cast<int>(vpx_usec_timer_elapsed(&timer)); + const int simd_elapsed_time = + static_cast<int>(vpx_usec_timer_elapsed(&simd_timer)); + printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time, + simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time)); + } + } + } +} + // This quantizer compares the AC coefficients to the quantization step size to // determine if further multiplication operations are needed. // Based on vp9_quantize_fp_sse2(). @@ -254,45 +386,6 @@ void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1); } -void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, - int16_t *quant, int16_t *quant_shift, - int16_t *dequant, int16_t *round_fp, - int16_t *quant_fp) { - // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V. - const int max_qrounding_factor_fp = 64; - - for (int j = 0; j < 2; j++) { - // The range is 4 to 1828 in the VP9 tables. - const int qlookup = rnd->RandRange(1825) + 4; - round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7; - quant_fp[j] = (1 << 16) / qlookup; - - // Values determined by deconstructing vp9_init_quantizer(). - // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y - // values or U/V values of any bit depth. This is because y_delta is not - // factored into the vp9_ac_quant() call. - zbin[j] = rnd->RandRange(1200); - - // round may be up to 685 for Y values or 914 for U/V. - round[j] = rnd->RandRange(914); - // quant ranges from 1 to -32703 - quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703; - // quant_shift goes up to 1 << 16. - quant_shift[j] = rnd->RandRange(16384); - // dequant maxes out at 1828 for all cases. - dequant[j] = rnd->RandRange(1828); - } - for (int j = 2; j < 8; j++) { - zbin[j] = zbin[1]; - round_fp[j] = round_fp[1]; - quant_fp[j] = quant_fp[1]; - round[j] = round[1]; - quant[j] = quant[1]; - quant_shift[j] = quant_shift[1]; - dequant[j] = dequant[1]; - } -} - TEST_P(VP9QuantizeTest, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); ASSERT_TRUE(coeff_.Init()); @@ -403,60 +496,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) { } } -TEST_P(VP9QuantizeTest, DISABLED_Speed) { - ACMRandom rnd(ACMRandom::DeterministicSeed()); - ASSERT_TRUE(coeff_.Init()); - ASSERT_TRUE(qcoeff_.Init()); - ASSERT_TRUE(dqcoeff_.Init()); - TX_SIZE starting_sz, ending_sz; - - if (max_size_ == 16) { - starting_sz = TX_4X4; - ending_sz = TX_16X16; - } else { - starting_sz = TX_32X32; - ending_sz = TX_32X32; - } +TEST_P(VP9QuantizeTest, DISABLED_Speed) { Speed(false); } - for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) { - // zbin > coeff, zbin < coeff. - for (int i = 0; i < 2; ++i) { - // TX_TYPE defines the scan order. That is not relevant to the speed test. - // Pick the first one. - const TX_TYPE tx_type = DCT_DCT; - count_ = (4 << sz) * (4 << sz); - scan_ = &vp9_scan_orders[sz][tx_type]; - - GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, - quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, - quant_fp_ptr_); - - if (i == 0) { - // When |coeff values| are less than zbin the results are 0. - int threshold = 100; - if (max_size_ == 32) { - // For 32x32, the threshold is halved. Double it to keep the values - // from clearing it. - threshold = 200; - } - for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold; - coeff_.Set(&rnd, -99, 99); - } else if (i == 1) { - for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50; - coeff_.Set(&rnd, -500, 500); - } - - RunNTimes(10000000 / count_); - const char *type = - (i == 0) ? "Bypass calculations " : "Full calculations "; - char block_size[16]; - snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz); - char title[100]; - snprintf(title, sizeof(title), "%25s %8s ", type, block_size); - PrintMedian(title); - } - } -} +TEST_P(VP9QuantizeTest, DISABLED_SpeedMedian) { Speed(true); } using std::make_tuple; |