aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--benchtests/Makefile6
-rw-r--r--benchtests/README9
-rw-r--r--sysdeps/x86/hp-timing.h14
4 files changed, 35 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 6cb7d604ce..cda75db202 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2018-10-24 H.J. Lu <hongjiu.lu@intel.com>
+
+ * benchtests/Makefile (CPPFLAGS-nonlib): Add -DUSE_RDTSCP if
+ USE_RDTSCP is defined.
+ * sysdeps/x86/hp-timing.h (HP_TIMING_NOW): Use RDTSCP if
+ USE_RDTSCP is defined.
+
2018-10-23 Adhemerval Zanella <adhemerval.zanella@linaro.org>
* misc/tst-preadvwritev2-common.c (IOV_MAX): Define if not
diff --git a/benchtests/Makefile b/benchtests/Makefile
index bcd6a9c26d..45aeb5febe 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -131,6 +131,12 @@ CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC
# HP_TIMING if it is available.
ifdef USE_CLOCK_GETTIME
CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
+else
+# On x86 processors, use RDTSCP, instead of RDTSC, to measure performance
+# of functions. All x86 processors since 2010 support RDTSCP instruction.
+ifdef USE_RDTSCP
+CPPFLAGS-nonlib += -DUSE_RDTSCP
+endif
endif
DETAILED_OPT :=
diff --git a/benchtests/README b/benchtests/README
index 4ddff794d1..aaf0b659e2 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -34,6 +34,15 @@ the benchmark to use clock_gettime by invoking make as follows:
Again, one must run `make bench-clean' before changing the measurement method.
+On x86 processors, RDTSCP instruction provides more precise timing data
+than RDTSC instruction. All x86 processors since 2010 support RDTSCP
+instruction. One can force the benchmark to use RDTSCP by invoking make
+as follows:
+
+ $ make USE_RDTSCP=1 bench
+
+One must run `make bench-clean' before changing the measurement method.
+
Running benchmarks on another target:
====================================
diff --git a/sysdeps/x86/hp-timing.h b/sysdeps/x86/hp-timing.h
index 77a1360748..0aa6f5e3f8 100644
--- a/sysdeps/x86/hp-timing.h
+++ b/sysdeps/x86/hp-timing.h
@@ -40,7 +40,19 @@ typedef unsigned long long int hp_timing_t;
NB: Use __builtin_ia32_rdtsc directly since including <x86intrin.h>
makes building glibc very slow. */
-# define HP_TIMING_NOW(Var) ((Var) = __builtin_ia32_rdtsc ())
+# ifdef USE_RDTSCP
+/* RDTSCP waits until all previous instructions have executed and all
+ previous loads are globally visible before reading the counter.
+ RDTSC doesn't wait until all previous instructions have been executed
+ before reading the counter. */
+# define HP_TIMING_NOW(Var) \
+ (__extension__ ({ \
+ unsigned int __aux; \
+ (Var) = __builtin_ia32_rdtscp (&__aux); \
+ }))
+# else
+# define HP_TIMING_NOW(Var) ((Var) = __builtin_ia32_rdtsc ())
+# endif
# include <hp-timing-common.h>
#else