diff options
author | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-03-15 12:30:03 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-03-15 12:30:03 +0530 |
commit | 8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70 (patch) | |
tree | 7eb91b35e7d04f1c4889563b3c922e512cfe2045 | |
parent | d22ca8cdfb98001d03772ef264b244930d439b3f (diff) | |
download | glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.gz glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.bz2 glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.zip |
Framework for performance benchmarking of functions
See benchtests/Makefile to know how to use it.
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | Makefile.in | 5 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | Rules | 27 | ||||
-rw-r--r-- | benchtests/Makefile | 59 | ||||
-rw-r--r-- | benchtests/bench-skeleton.c | 75 | ||||
-rw-r--r-- | benchtests/exp-inputs | 1 | ||||
-rw-r--r-- | benchtests/pow-inputs | 1 | ||||
-rwxr-xr-x | scripts/bench.pl | 93 |
9 files changed, 278 insertions, 2 deletions
@@ -1,4 +1,21 @@ 2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com> + Richard Henderson <rth@redhat.com> + Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> + + * Makefile.in (bench): New target. + * NEWS: Mention the benchmark framework. + * Rules (bench): Likewise. + (binaries-bench): Generate binaries for functions to + benchmark. + * benchtests/Makefile: New makefile for benchmark tests. + * benchtests/bench-skeleton.c: New skeleton file for benchmark + programs. + * benchtests/exp-inputs: New input file for EXP function. + * benchtests/pow-inputs: New input file for POW function. + * scripts/bench.pl: New script to generate source files for + benchmark programs. + +2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com> * sysdeps/ieee754/dbl-64/mpa-arch.h: New file. * sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T to store diff --git a/Makefile.in b/Makefile.in index d73a78f8e9..df75b8f0c4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -3,7 +3,7 @@ srcdir = @srcdir@ # Uncomment the line below if you want to do parallel build. # PARALLELMFLAGS = -j 4 -.PHONY: all install +.PHONY: all install bench all .DEFAULT: $(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@ @@ -11,3 +11,6 @@ all .DEFAULT: install: LANGUAGE=C LC_ALL=C; export LANGUAGE LC_ALL; \ $(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@ + +bench: + $(MAKE) -C $(srcdir)/benchtests $(PARALLELMFLAGS) objdir=`pwd` $@ @@ -23,6 +23,8 @@ Version 2.18 * Support for priority inherited mutexes in pthread condition variables on non-x86 architectures. +* Added a benchmark framework to track performance of functions in glibc. + Version 2.17 @@ -83,7 +83,7 @@ common-generated += dummy.o dummy.c # This makes all the auxiliary and test programs. -.PHONY: others tests +.PHONY: others tests bench ifeq ($(multi-arch),no) tests := $(filter-out $(tests-ifunc), $(tests)) xtests := $(filter-out $(xtests-ifunc), $(xtests)) @@ -188,6 +188,31 @@ $(objpfx)%.out: /dev/null $(objpfx)% # Make it 2nd arg for canned sequence. $(make-test-out) > $@ endif # tests + +# Build and run benchmark programs. +binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) + +run-bench = $(test-wrapper-env) \ + GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \ + $($*-ENV) $(run-via-rtld-prefix) $${run} + +bench: $(binaries-bench) + if [ -f $(objpfx)bench.out ]; then \ + mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \ + fi + for run in $^; do \ + eval $(run-bench) >> $(objpfx)bench.out; \ + done + +$(binaries-bench): %: %.o \ + $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ + $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) + $(+link) + +$(objpfx)bench-%.c: %-inputs bench-skeleton.c + $(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \ + $($*-ITER) $($*-ARGLIST) $($*-RET) > $@ + .PHONY: distclean realclean subdir_distclean subdir_realclean \ subdir_clean subdir_mostlyclean subdir_testclean diff --git a/benchtests/Makefile b/benchtests/Makefile new file mode 100644 index 0000000000..74938b925c --- /dev/null +++ b/benchtests/Makefile @@ -0,0 +1,59 @@ +# Copyright (C) 2013 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + + +# Makefile for benchmark tests. The only useful target here is `bench`. + +# Adding a new function `foo`: +# --------------------------- + +# - Append the function name to the bench variable + +# - Define foo-ITER with the number of iterations you want to run. Keep it +# high enough that the overhead of clock_gettime is only a small fraction of +# the total run time of the test. + +# - Define foo-ARGLIST as a colon separated list of types of the input +# arguments. Use `void` if function does not take any inputs. Put in quotes +# if the input argument is a pointer, e.g.: + +# malloc-ARGLIST: "void *" + +# - Define foo-RET as the type the function returns. Skip if the function +# returns void. One could even skip foo-ARGLIST if the function does not +# take any inputs AND the function returns void. + + +# - Make a file called `foo-inputs` with one input value per line, an input +# being a comma separated list of arguments to be passed into the function. +# See pow-inputs for an example. + +subdir := benchtests +bench := exp pow + +exp-ITER = 100000 +exp-ARGLIST = double +exp-RET = double +LDFLAGS-bench-exp = -lm + +pow-ITER = 100000 +pow-ARGLIST = double:double +pow-RET = double +LDFLAGS-bench-pow = -lm + +include ../Makeconfig +include ../Rules diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c new file mode 100644 index 0000000000..13f986d817 --- /dev/null +++ b/benchtests/bench-skeleton.c @@ -0,0 +1,75 @@ +/* Skeleton for benchmark programs. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include <time.h> +#include <inttypes.h> + +int +main (int argc, char **argv) +{ + unsigned long i, j, k; + uint64_t total = 0, max = 0, min = 0x7fffffffffffffff; + struct timespec start, end; + + memset (&start, 0, sizeof (start)); + memset (&end, 0, sizeof (end)); + + clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); + + /* Measure 1000 times the resolution of the clock. So for a 1ns resolution + clock, we measure 1000 iterations of the function call at a time. + Measurements close to the minimum clock resolution won't make much sense, + but it's better than having nothing at all. */ + unsigned long iters = 1000 * start.tv_nsec; + unsigned long total_iters = ITER / iters; + + for (i = 0; i < NUM_SAMPLES; i++) + { + for (j = 0; j < total_iters; j ++) + { + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start); + for (k = 0; k < iters; k++) + BENCH_FUNC(i); + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end); + + uint64_t cur = (end.tv_nsec - start.tv_nsec + + ((end.tv_sec - start.tv_sec) + * (uint64_t) 1000000000)); + + if (cur > max) + max = cur; + + if (cur < min) + min = cur; + + total += cur; + } + } + + double d_total_s = total * 1e-9; + double d_iters = iters; + double d_total_i = (double)ITER * NUM_SAMPLES; + printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n", + d_total_i, d_total_s, max / d_iters, min / d_iters, + d_total_i / d_total_s); + + return 0; +} diff --git a/benchtests/exp-inputs b/benchtests/exp-inputs new file mode 100644 index 0000000000..a2086baa86 --- /dev/null +++ b/benchtests/exp-inputs @@ -0,0 +1 @@ +708.00096423260981737257679924368858 diff --git a/benchtests/pow-inputs b/benchtests/pow-inputs new file mode 100644 index 0000000000..dbb1270b75 --- /dev/null +++ b/benchtests/pow-inputs @@ -0,0 +1 @@ +1.0000000000000020, 1.5 diff --git a/scripts/bench.pl b/scripts/bench.pl new file mode 100755 index 0000000000..bb7f64897e --- /dev/null +++ b/scripts/bench.pl @@ -0,0 +1,93 @@ +#! /usr/bin/perl -w +# Copyright (C) 2013 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + + +use strict; +use warnings; +# Generate a benchmark source file for a given input. + +if (@ARGV < 2) { + die "Usage: bench.pl <function> <iterations> [parameter types] [return type]" +} + +my $arg; +my $func = $ARGV[0]; +my $iters = $ARGV[1]; +my @args; +my $ret = "void"; +my $getret = ""; +my $retval = ""; + +if (@ARGV >= 3) { + @args = split(':', $ARGV[2]); +} + +if (@ARGV == 4) { + $ret = $ARGV[3]; +} + +my $decl = "extern $ret $func ("; + +if (@args == 0 || $args[0] eq "void") { + print "$decl void);\n"; + print "#define CALL_BENCH_FUNC(j) $func();\n"; + print "#define NUM_SAMPLES (1)\n"; +} +else { + my $num = 0; + my $bench_func = "#define CALL_BENCH_FUNC(j) $func ("; + my $struct = "struct args {"; + + foreach $arg (@args) { + if ($num > 0) { + $bench_func = "$bench_func,"; + $decl = "$decl,"; + } + + $struct = "$struct $arg arg$num;"; + $bench_func = "$bench_func in[j].arg$num"; + $decl = "$decl $arg"; + $num = $num + 1; + } + + print "$decl);\n"; + print "$bench_func);\n"; + print "$struct } in[] = {"; + + open INPUTS, "<$func-inputs" or die $!; + + while (<INPUTS>) { + chomp; + print "{$_},\n"; + } + print "};\n"; + print "#define NUM_SAMPLES (sizeof (in) / sizeof (struct args))\n" +} + +# In some cases not storing a return value seems to result in the function call +# being optimized out. +if ($ret ne "void") { + print "static volatile $ret ret = 0.0;\n"; + $getret = "ret = "; +} + +print "#define BENCH_FUNC(j) ({$getret CALL_BENCH_FUNC (j);})\n"; + +print "#define ITER $iters\n"; +print "#define FUNCNAME \"$func\"\n"; +print "#include \"bench-skeleton.c\"\n"; |