aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--benchtests/README6
-rw-r--r--benchtests/bench-skeleton.c57
3 files changed, 51 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 2d81375287..80bb1f847e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-06-20 Wilco Dijkstra <wdijkstr@arm.com>
+
+ * benchtests/README: Describe workload feature.
+ * benchtests/bench-skeleton.c (main): Add support for
+ benchmarking traces from workloads.
+
2017-06-20 Zack Weinberg <zackw@panix.com>
* string/string.h (__mempcpy_inline): Delete.
diff --git a/benchtests/README b/benchtests/README
index 2c5f381135..b015acfd53 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -102,6 +102,12 @@ the same file by using the `name' directive that looks something like this:
See the pow-inputs file for an example of what such a partitioned input file
would look like.
+It is also possible to measure throughput of a (partial) trace extracted from
+a real workload. In this case the whole trace is iterated over multiple times
+rather than repeating every input multiple times. This can be done via:
+
+ ##name: workload-<name>
+
Benchmark Sets:
==============
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 09eb78df1b..3c6dad7055 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -68,34 +68,50 @@ main (int argc, char **argv)
clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
runtime.tv_sec += DURATION;
+ bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
double d_total_i = 0;
timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
int64_t c = 0;
+ uint64_t cur;
while (1)
{
- for (i = 0; i < NUM_SAMPLES (v); i++)
+ if (is_bench)
{
- uint64_t cur;
+ /* Benchmark a real trace of calls - all samples are iterated
+ over once before repeating. This models actual use more
+ accurately than repeating the same sample many times. */
TIMING_NOW (start);
for (k = 0; k < iters; k++)
- BENCH_FUNC (v, i);
+ for (i = 0; i < NUM_SAMPLES (v); i++)
+ BENCH_FUNC (v, i);
TIMING_NOW (end);
-
TIMING_DIFF (cur, start, end);
+ TIMING_ACCUM (total, cur);
+ d_total_i += iters * NUM_SAMPLES (v);
+ }
+ else
+ for (i = 0; i < NUM_SAMPLES (v); i++)
+ {
+ TIMING_NOW (start);
+ for (k = 0; k < iters; k++)
+ BENCH_FUNC (v, i);
+ TIMING_NOW (end);
- if (cur > max)
- max = cur;
+ TIMING_DIFF (cur, start, end);
- if (cur < min)
- min = cur;
+ if (cur > max)
+ max = cur;
- TIMING_ACCUM (total, cur);
- /* Accumulate timings for the value. In the end we will divide
- by the total iterations. */
- RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+ if (cur < min)
+ min = cur;
- d_total_i += iters;
- }
+ TIMING_ACCUM (total, cur);
+ /* Accumulate timings for the value. In the end we will divide
+ by the total iterations. */
+ RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+
+ d_total_i += iters;
+ }
c++;
struct timespec curtime;
@@ -117,11 +133,16 @@ main (int argc, char **argv)
json_attr_double (&json_ctx, "duration", d_total_s);
json_attr_double (&json_ctx, "iterations", d_total_i);
- json_attr_double (&json_ctx, "max", max / d_iters);
- json_attr_double (&json_ctx, "min", min / d_iters);
- json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+ if (is_bench)
+ json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+ else
+ {
+ json_attr_double (&json_ctx, "max", max / d_iters);
+ json_attr_double (&json_ctx, "min", min / d_iters);
+ json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+ }
- if (detailed)
+ if (detailed && !is_bench)
{
json_array_begin (&json_ctx, "timings");