diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | benchtests/README | 6 | ||||
-rw-r--r-- | benchtests/bench-skeleton.c | 57 |
3 files changed, 51 insertions, 18 deletions
@@ -1,3 +1,9 @@ +2017-06-20 Wilco Dijkstra <wdijkstr@arm.com> + + * benchtests/README: Describe workload feature. + * benchtests/bench-skeleton.c (main): Add support for + benchmarking traces from workloads. + 2017-06-20 Zack Weinberg <zackw@panix.com> * string/string.h (__mempcpy_inline): Delete. diff --git a/benchtests/README b/benchtests/README index 2c5f381135..b015acfd53 100644 --- a/benchtests/README +++ b/benchtests/README @@ -102,6 +102,12 @@ the same file by using the `name' directive that looks something like this: See the pow-inputs file for an example of what such a partitioned input file would look like. +It is also possible to measure throughput of a (partial) trace extracted from +a real workload. In this case the whole trace is iterated over multiple times +rather than repeating every input multiple times. This can be done via: + + ##name: workload-<name> + Benchmark Sets: ============== diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 09eb78df1b..3c6dad7055 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -68,34 +68,50 @@ main (int argc, char **argv) clock_gettime (CLOCK_MONOTONIC_RAW, &runtime); runtime.tv_sec += DURATION; + bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0; double d_total_i = 0; timing_t total = 0, max = 0, min = 0x7fffffffffffffff; int64_t c = 0; + uint64_t cur; while (1) { - for (i = 0; i < NUM_SAMPLES (v); i++) + if (is_bench) { - uint64_t cur; + /* Benchmark a real trace of calls - all samples are iterated + over once before repeating. This models actual use more + accurately than repeating the same sample many times. */ TIMING_NOW (start); for (k = 0; k < iters; k++) - BENCH_FUNC (v, i); + for (i = 0; i < NUM_SAMPLES (v); i++) + BENCH_FUNC (v, i); TIMING_NOW (end); - TIMING_DIFF (cur, start, end); + TIMING_ACCUM (total, cur); + d_total_i += iters * NUM_SAMPLES (v); + } + else + for (i = 0; i < NUM_SAMPLES (v); i++) + { + TIMING_NOW (start); + for (k = 0; k < iters; k++) + BENCH_FUNC (v, i); + TIMING_NOW (end); - if (cur > max) - max = cur; + TIMING_DIFF (cur, start, end); - if (cur < min) - min = cur; + if (cur > max) + max = cur; - TIMING_ACCUM (total, cur); - /* Accumulate timings for the value. In the end we will divide - by the total iterations. */ - RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters); + if (cur < min) + min = cur; - d_total_i += iters; - } + TIMING_ACCUM (total, cur); + /* Accumulate timings for the value. In the end we will divide + by the total iterations. */ + RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters); + + d_total_i += iters; + } c++; struct timespec curtime; @@ -117,11 +133,16 @@ main (int argc, char **argv) json_attr_double (&json_ctx, "duration", d_total_s); json_attr_double (&json_ctx, "iterations", d_total_i); - json_attr_double (&json_ctx, "max", max / d_iters); - json_attr_double (&json_ctx, "min", min / d_iters); - json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); + if (is_bench) + json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i); + else + { + json_attr_double (&json_ctx, "max", max / d_iters); + json_attr_double (&json_ctx, "min", min / d_iters); + json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); + } - if (detailed) + if (detailed && !is_bench) { json_array_begin (&json_ctx, "timings"); |