Bench Class For More Robust Speed Tests

To make speed testing more robust, the AbstractBench runs the desired code multiple times and report the median run time with mean absolute deviation around the median. To use the AbstractBench, simply add it as a parent to your test class, and implement the run() method (with the code you want to benchmark). Sample output for VP9QuantizeTest [ BENCH ] Bypass calculations 4x4 165.8 ms ( ±1.0 ms ) [ BENCH ] Full calculations 4x4 165.8 ms ( ±0.9 ms ) [ BENCH ] Bypass calculations 8x8 129.7 ms ( ±0.9 ms ) [ BENCH ] Full calculations 8x8 130.3 ms ( ±1.4 ms ) [ BENCH ] Bypass calculations 16x16 110.3 ms ( ±1.4 ms ) [ BENCH ] Full calculations 16x16 110.1 ms ( ±0.9 ms ) Change-Id: I1dd649754cb8c4c621eee2728198ea6a555f38b3
author: Luc Trudeau <luc@trud.ca> 2018-05-22 14:16:15 -0400
committer: Luc Trudeau <luc@trud.ca> 2018-05-29 13:04:47 +0000
commit: b2898a9ade42f623cdc6ab526de657fd078660d9 (patch)
tree: e12d7bd0456d1e70535093d8c486ba6e64d9683c
parent: 2b08f89076d1e93339fbbcc10e3298a0eec66bd6 (diff)
download: libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.tar
libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.tar.gz
libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.tar.bz2
libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.zip
4 files changed, 122 insertions, 49 deletions
diff --git a/test/bench.cc b/test/bench.cc
new file mode 100644
index 000000000..281b7411d
--- /dev/null
+++ b/test/bench.cc
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <algorithm>
+
+#include "test/bench.h"
+#include "vpx_ports/vpx_timer.h"
+
+void AbstractBench::runNTimes(int n) {
+  for (int r = 0; r < VPX_BENCH_ROBUST_ITER; r++) {
+    vpx_usec_timer timer;
+    vpx_usec_timer_start(&timer);
+    for (int j = 0; j < n; ++j) {
+      run();
+    }
+    vpx_usec_timer_mark(&timer);
+    times[r] = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+  }
+}
+
+void AbstractBench::printMedian(const char *title) {
+  std::sort(times, times + VPX_BENCH_ROBUST_ITER);
+  const int med = times[VPX_BENCH_ROBUST_ITER >> 1];
+  int sad = 0;
+  for (int t = 0; t < VPX_BENCH_ROBUST_ITER; t++) {
+    sad += abs(times[t] - med);
+  }
+  printf("[%10s] %s %.1f ms ( ±%.1f ms )\n", "BENCH ", title, med / 1000.0,
+         sad / (VPX_BENCH_ROBUST_ITER * 1000.0));
+}
diff --git a/test/bench.h b/test/bench.h
new file mode 100644
index 000000000..0b0cf10a4
--- /dev/null
+++ b/test/bench.h
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_BENCH_H_
+#define TEST_BENCH_H_
+
+// Number of iterations used to compute median run time.
+#define VPX_BENCH_ROBUST_ITER 15
+
+class AbstractBench {
+ public:
+  void runNTimes(int n);
+  void printMedian(const char *title);
+
+ protected:
+  // Implement this method and put the code to benchmark in it.
+  virtual void run() = 0;
+
+ private:
+  int times[VPX_BENCH_ROBUST_ITER];
+};
+
+#endif  // TEST_BENCH_H_
diff --git a/test/test.mk b/test/test.mk
index 3e5739e21..224ac4e8f 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -1,4 +1,6 @@
 LIBVPX_TEST_SRCS-yes += acm_random.h
+LIBVPX_TEST_SRCS-yes += bench.h
+LIBVPX_TEST_SRCS-yes += bench.cc
 LIBVPX_TEST_SRCS-yes += buffer.h
 LIBVPX_TEST_SRCS-yes += clear_system_state.h
 LIBVPX_TEST_SRCS-yes += codec_factory.h
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index 33932ff95..c39267faa 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -18,6 +18,7 @@
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
+#include "test/bench.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -67,10 +68,13 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
      scan, iscan);
 }
 
-class VP9QuantizeBase {
+class VP9QuantizeBase : public AbstractBench {
  public:
   VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
-      : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
+      : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
+        coeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
+        qcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
+        dqcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
     max_value_ = (1 << bit_depth_) - 1;
     zbin_ptr_ =
         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
@@ -86,6 +90,9 @@ class VP9QuantizeBase {
         vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
     dequant_ptr_ = reinterpret_cast<int16_t *>(
         vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
+
+    r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+    q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
   }
 
   ~VP9QuantizeBase() {
@@ -118,6 +125,15 @@ class VP9QuantizeBase {
   int max_value_;
   const int max_size_;
   const bool is_fp_;
+  Buffer<tran_low_t> coeff;
+  Buffer<tran_low_t> qcoeff;
+  Buffer<tran_low_t> dqcoeff;
+  int16_t *r_ptr;
+  int16_t *q_ptr;
+  int count;
+  int skip_block;
+  const scan_order *scan;
+  uint16_t eob;
 };
 
 class VP9QuantizeTest : public VP9QuantizeBase,
@@ -128,10 +144,17 @@ class VP9QuantizeTest : public VP9QuantizeBase,
         quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
 
  protected:
+  void run();
   const QuantizeFunc quantize_op_;
   const QuantizeFunc ref_quantize_op_;
 };
 
+void VP9QuantizeTest::run() {
+  quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
+               quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
+               dequant_ptr_, &eob, scan->scan, scan->iscan);
+}
+
 // This quantizer compares the AC coefficients to the quantization step size to
 // determine if further multiplication operations are needed.
 // Based on vp9_quantize_fp_sse2().
@@ -269,11 +292,8 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
 
 TEST_P(VP9QuantizeTest, OperationCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
   ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(dqcoeff.Init());
   Buffer<tran_low_t> ref_qcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
@@ -281,7 +301,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
   Buffer<tran_low_t> ref_dqcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(ref_dqcoeff.Init());
-  uint16_t eob, ref_eob;
+  uint16_t ref_eob = 0;
+  eob = 0;
 
   for (int i = 0; i < number_of_iterations; ++i) {
     // Test skip block for the first three iterations to catch all the different
@@ -294,23 +315,21 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
       sz = TX_32X32;
     }
     const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    const int count = (4 << sz) * (4 << sz);
+    scan = &vp9_scan_orders[sz][tx_type];
+    count = (4 << sz) * (4 << sz);
     coeff.Set(&rnd, -max_value_, max_value_);
     GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                          quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                          quant_fp_ptr_);
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
     ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                      q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                      ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
-                     scan_order->scan, scan_order->iscan);
+                     scan->scan, scan->iscan);
 
     ASM_REGISTER_STATE_CHECK(quantize_op_(
         coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
         quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+        dequant_ptr_, &eob, scan->scan, scan->iscan));
 
     EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
     EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -328,11 +347,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
 
 TEST_P(VP9QuantizeTest, EOBCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
   ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(dqcoeff.Init());
   Buffer<tran_low_t> ref_qcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
@@ -340,10 +356,12 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
   Buffer<tran_low_t> ref_dqcoeff =
       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(ref_dqcoeff.Init());
-  uint16_t eob, ref_eob;
+  uint16_t ref_eob = 0;
+  eob = 0;
+  const uint32_t max_index = max_size_ * max_size_ - 1;
 
   for (int i = 0; i < number_of_iterations; ++i) {
-    const int skip_block = 0;
+    skip_block = 0;
     TX_SIZE sz;
     if (max_size_ == 16) {
       sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
@@ -351,28 +369,26 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
       sz = TX_32X32;
     }
     const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);
+    scan = &vp9_scan_orders[sz][tx_type];
+    count = (4 << sz) * (4 << sz);
     // Two random entries
     coeff.Set(0);
-    coeff.TopLeftPixel()[rnd(count)] =
+    coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
         static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
-    coeff.TopLeftPixel()[rnd(count)] =
+    coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
         static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
     GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                          quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                          quant_fp_ptr_);
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
     ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                      q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                      ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
-                     scan_order->scan, scan_order->iscan);
+                     scan->scan, scan->iscan);
 
     ASM_REGISTER_STATE_CHECK(quantize_op_(
         coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
         quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+        dequant_ptr_, &eob, scan->scan, scan->iscan));
 
     EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
     EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -390,13 +406,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
 
 TEST_P(VP9QuantizeTest, DISABLED_Speed) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
   ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
   ASSERT_TRUE(dqcoeff.Init());
-  uint16_t eob;
   TX_SIZE starting_sz, ending_sz;
 
   if (max_size_ == 16) {
@@ -410,18 +422,16 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
   for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
     // zbin > coeff, zbin < coeff.
     for (int i = 0; i < 2; ++i) {
-      const int skip_block = 0;
+      skip_block = 0;
       // TX_TYPE defines the scan order. That is not relevant to the speed test.
       // Pick the first one.
       const TX_TYPE tx_type = DCT_DCT;
-      const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-      const int count = (4 << sz) * (4 << sz);
+      count = (4 << sz) * (4 << sz);
+      scan = &vp9_scan_orders[sz][tx_type];
 
       GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                            quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                            quant_fp_ptr_);
-      int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-      int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
 
       if (i == 0) {
         // When |coeff values| are less than zbin the results are 0.
@@ -438,22 +448,15 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
         coeff.Set(&rnd, -500, 500);
       }
 
-      vpx_usec_timer timer;
-      vpx_usec_timer_start(&timer);
-      for (int j = 0; j < 100000000 / count; ++j) {
-        quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
-                     q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
-                     dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
-                     scan_order->scan, scan_order->iscan);
-      }
-      vpx_usec_timer_mark(&timer);
-      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-      if (i == 0) printf("Bypass calculations.\n");
-      if (i == 1) printf("Full calculations.\n");
-      printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
-             elapsed_time / 1000);
+      runNTimes(10000000 / count);
+      const char *type =
+          (i == 0) ? "Bypass calculations " : "Full calculations ";
+      char block_size[16];
+      snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
+      char title[100];
+      snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
+      printMedian(title);
     }
-    printf("\n");
   }
 }
author	Luc Trudeau <luc@trud.ca>	2018-05-22 14:16:15 -0400
committer	Luc Trudeau <luc@trud.ca>	2018-05-29 13:04:47 +0000
commit	b2898a9ade42f623cdc6ab526de657fd078660d9 (patch)
tree	e12d7bd0456d1e70535093d8c486ba6e64d9683c
parent	2b08f89076d1e93339fbbcc10e3298a0eec66bd6 (diff)
download	libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.tar libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.tar.gz libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.tar.bz2 libvpx-b2898a9ade42f623cdc6ab526de657fd078660d9.zip