diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | benchtests/Makefile | 6 | ||||
-rw-r--r-- | benchtests/README | 9 | ||||
-rw-r--r-- | sysdeps/x86/hp-timing.h | 14 |
4 files changed, 35 insertions, 1 deletions
@@ -1,3 +1,10 @@ +2018-10-24 H.J. Lu <hongjiu.lu@intel.com> + + * benchtests/Makefile (CPPFLAGS-nonlib): Add -DUSE_RDTSCP if + USE_RDTSCP is defined. + * sysdeps/x86/hp-timing.h (HP_TIMING_NOW): Use RDTSCP if + USE_RDTSCP is defined. + 2018-10-23 Adhemerval Zanella <adhemerval.zanella@linaro.org> * misc/tst-preadvwritev2-common.c (IOV_MAX): Define if not diff --git a/benchtests/Makefile b/benchtests/Makefile index bcd6a9c26d..45aeb5febe 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -131,6 +131,12 @@ CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC # HP_TIMING if it is available. ifdef USE_CLOCK_GETTIME CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME +else +# On x86 processors, use RDTSCP, instead of RDTSC, to measure performance +# of functions. All x86 processors since 2010 support RDTSCP instruction. +ifdef USE_RDTSCP +CPPFLAGS-nonlib += -DUSE_RDTSCP +endif endif DETAILED_OPT := diff --git a/benchtests/README b/benchtests/README index 4ddff794d1..aaf0b659e2 100644 --- a/benchtests/README +++ b/benchtests/README @@ -34,6 +34,15 @@ the benchmark to use clock_gettime by invoking make as follows: Again, one must run `make bench-clean' before changing the measurement method. +On x86 processors, RDTSCP instruction provides more precise timing data +than RDTSC instruction. All x86 processors since 2010 support RDTSCP +instruction. One can force the benchmark to use RDTSCP by invoking make +as follows: + + $ make USE_RDTSCP=1 bench + +One must run `make bench-clean' before changing the measurement method. + Running benchmarks on another target: ==================================== diff --git a/sysdeps/x86/hp-timing.h b/sysdeps/x86/hp-timing.h index 77a1360748..0aa6f5e3f8 100644 --- a/sysdeps/x86/hp-timing.h +++ b/sysdeps/x86/hp-timing.h @@ -40,7 +40,19 @@ typedef unsigned long long int hp_timing_t; NB: Use __builtin_ia32_rdtsc directly since including <x86intrin.h> makes building glibc very slow. */ -# define HP_TIMING_NOW(Var) ((Var) = __builtin_ia32_rdtsc ()) +# ifdef USE_RDTSCP +/* RDTSCP waits until all previous instructions have executed and all + previous loads are globally visible before reading the counter. + RDTSC doesn't wait until all previous instructions have been executed + before reading the counter. */ +# define HP_TIMING_NOW(Var) \ + (__extension__ ({ \ + unsigned int __aux; \ + (Var) = __builtin_ia32_rdtscp (&__aux); \ + })) +# else +# define HP_TIMING_NOW(Var) ((Var) = __builtin_ia32_rdtsc ()) +# endif # include <hp-timing-common.h> #else |