diff options
Diffstat (limited to 'nptl/perf.c')
-rw-r--r-- | nptl/perf.c | 749 |
1 files changed, 749 insertions, 0 deletions
diff --git a/nptl/perf.c b/nptl/perf.c new file mode 100644 index 0000000000..e94ccf8a6b --- /dev/null +++ b/nptl/perf.c @@ -0,0 +1,749 @@ +/* Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#define _GNU_SOURCE 1 +#include <argp.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <pthread.h> +#include <signal.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <sys/param.h> +#include <sys/types.h> + +#ifndef MAX_THREADS +# define MAX_THREADS 100000 +#endif +#ifndef DEFAULT_THREADS +# define DEFAULT_THREADS 50 +#endif + + +#define OPT_TO_THREAD 300 +#define OPT_TO_PROCESS 301 +#define OPT_SYNC_SIGNAL 302 +#define OPT_SYNC_JOIN 303 +#define OPT_TOPLEVEL 304 + + +static const struct argp_option options[] = + { + { NULL, 0, NULL, 0, "\ +This is a test for threads so we allow ther user to selection the number of \ +threads which are used at any one time. Independently the total number of \ +rounds can be selected. This is the total number of threads which will have \ +run when the process terminates:" }, + { "threads", 't', "NUMBER", 0, "Number of threads used at once" }, + { "starts", 's', "NUMBER", 0, "Total number of working threads" }, + { "toplevel", OPT_TOPLEVEL, "NUMBER", 0, + "Number of toplevel threads which start the other threads; this \ +implies --sync-join" }, + + { NULL, 0, NULL, 0, "\ +Each thread can do one of two things: sleep or do work. The latter is 100% \ +CPU bound. The work load is the probability a thread does work. All values \ +from zero to 100 (inclusive) are valid. How often each thread repeats this \ +can be determined by the number of rounds. The work cost determines how long \ +each work session (not sleeping) takes. If it is zero a thread would \ +effectively nothing. By setting the number of rounds to zero the thread \ +does no work at all and pure thread creation times can be measured." }, + { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" }, + { "workcost", 'c', "NUMBER", 0, + "Factor in the cost of each round of working" }, + { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" }, + + { NULL, 0, NULL, 0, "\ +There are a number of different methods how thread creation can be \ +synchronized. Synchronization is necessary since the number of concurrently \ +running threads is limited." }, + { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0, + "Synchronize using a signal (default)" }, + { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" }, + + { NULL, 0, NULL, 0, "\ +One parameter for each threads execution is the size of the stack. If this \ +parameter is not used the system's default stack size is used. If many \ +threads are used the stack size should be chosen quite small." }, + { "stacksize", 'S', "BYTES", 0, "Size of threads stack" }, + { "guardsize", 'g', "BYTES", 0, + "Size of stack guard area; must fit into the stack" }, + + { NULL, 0, NULL, 0, "Signal options:" }, + { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" }, + { "to-process", OPT_TO_PROCESS, NULL, 0, + "Send signal to process (default)" }, + + { NULL, 0, NULL, 0, "Administrative options:" }, + { "progress", 'p', NULL, 0, "Show signs of progress" }, + { "timing", 'T', NULL, 0, + "Measure time from startup to the last thread finishing" }, + { NULL, 0, NULL, 0, NULL } + }; + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt +}; + + +static unsigned long int threads = DEFAULT_THREADS; +static unsigned long int workload = 75; +static unsigned long int workcost = 20; +static unsigned long int rounds = 10; +static long int starts = 5000; +static unsigned long int stacksize; +static long int guardsize = -1; +static bool progress; +static bool timing; +static bool to_thread; +static unsigned long int toplevel = 1; + + +static long int running; +static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER; + +static pid_t pid; +static pthread_t tmain; + +static clockid_t cl; +static struct timespec start_time; + + +static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER; +unsigned int sum; + +static enum + { + sync_signal, + sync_join + } +sync_method; + + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + + +/* Attributes for all created threads. */ +static pthread_attr_t attr; + + +static void * +work (void *arg) +{ + unsigned long int i; + unsigned int state = (unsigned long int) arg; + + for (i = 0; i < rounds; ++i) + { + /* Determine what to do. */ + unsigned int rnum; + + /* Equal distribution. */ + do + rnum = rand_r (&state); + while (rnum >= UINT_MAX - (UINT_MAX % 100)); + + rnum %= 100; + + if (rnum < workload) + { + int j; + int a[4] = { i, rnum, i + rnum, rnum - i }; + + if (progress) + write (STDERR_FILENO, "c", 1); + + for (j = 0; j < workcost; ++j) + { + a[0] += a[3] >> 12; + a[1] += a[2] >> 20; + a[2] += a[1] ^ 0x3423423; + a[3] += a[0] - a[1]; + } + + pthread_mutex_lock (&sum_mutex); + sum += a[0] + a[1] + a[2] + a[3]; + pthread_mutex_unlock (&sum_mutex); + } + else + { + /* Just sleep. */ + struct timespec tv; + + tv.tv_sec = 0; + tv.tv_nsec = 10000000; + + if (progress) + write (STDERR_FILENO, "w", 1); + + nanosleep (&tv, NULL); + } + } + + return NULL; +} + + +static void * +thread_function (void *arg) +{ + work (arg); + + pthread_mutex_lock (&running_mutex); + if (--running <= 0 && starts <= 0) + { + /* We are done. */ + if (progress) + write (STDERR_FILENO, "\n", 1); + + if (timing) + { + struct timespec end_time; + + if (clock_gettime (cl, &end_time) == 0) + { + end_time.tv_sec -= start_time.tv_sec; + end_time.tv_nsec -= start_time.tv_nsec; + if (end_time.tv_nsec < 0) + { + end_time.tv_nsec += 1000000000; + --end_time.tv_sec; + } + + printf ("\nRuntime: %lu.%09lu seconds\n", + (unsigned long int) end_time.tv_sec, + (unsigned long int) end_time.tv_nsec); + } + } + + printf ("Result: %08x\n", sum); + + exit (0); + } + pthread_mutex_unlock (&running_mutex); + + if (sync_method == sync_signal) + { + if (to_thread) + /* This code sends a signal to the main thread. */ + pthread_kill (tmain, SIGUSR1); + else + /* Use this code to test sending a signal to the process. */ + kill (pid, SIGUSR1); + } + + if (progress) + write (STDERR_FILENO, "f", 1); + + return NULL; +} + + +struct start_info +{ + unsigned int starts; + unsigned int threads; +}; + + +static void * +start_threads (void *arg) +{ + struct start_info *si = arg; + unsigned int starts = si->starts; + pthread_t ths[si->threads]; + unsigned int state = starts; + unsigned int n; + unsigned int i = 0; + int err; + + if (progress) + write (STDERR_FILENO, "T", 1); + + memset (ths, '\0', sizeof (pthread_t) * si->threads); + + while (starts-- > 0) + { + if (ths[i] != 0) + { + /* Wait for the threads in the order they were created. */ + err = pthread_join (ths[i], NULL); + if (err != 0) + error (EXIT_FAILURE, err, "cannot join thread"); + + if (progress) + write (STDERR_FILENO, "f", 1); + } + + err = pthread_create (&ths[i], &attr, work, + (void *) (rand_r (&state) + starts + i)); + + if (err != 0) + error (EXIT_FAILURE, err, "cannot start thread"); + + if (progress) + write (STDERR_FILENO, "t", 1); + + if (++i == si->threads) + i = 0; + } + + n = i; + do + { + if (ths[i] != 0) + { + err = pthread_join (ths[i], NULL); + if (err != 0) + error (EXIT_FAILURE, err, "cannot join thread"); + + if (progress) + write (STDERR_FILENO, "f", 1); + } + + if (++i == si->threads) + i = 0; + } + while (i != n); + + if (progress) + write (STDERR_FILENO, "F", 1); + + return NULL; +} + + +int +main (int argc, char *argv[]) +{ + int remaining; + sigset_t ss; + pthread_t th; + pthread_t *ths = NULL; + int empty = 0; + int last; + bool cont = true; + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + if (sync_method == sync_join) + { + ths = (pthread_t *) calloc (threads, sizeof (pthread_t)); + if (ths == NULL) + error (EXIT_FAILURE, errno, + "cannot allocate memory for thread descriptor array"); + + last = threads; + } + else + { + ths = &th; + last = 1; + } + + if (toplevel > threads) + { + printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n", + threads); + toplevel = threads; + } + + if (timing) + { + if (clock_getcpuclockid (0, &cl) != 0 + || clock_gettime (cl, &start_time) != 0) + timing = false; + } + + /* We need this later. */ + pid = getpid (); + tmain = pthread_self (); + + /* We use signal SIGUSR1 for communication between the threads and + the main thread. We only want sychronous notification. */ + if (sync_method == sync_signal) + { + sigemptyset (&ss); + sigaddset (&ss, SIGUSR1); + if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0) + error (EXIT_FAILURE, errno, "cannot set signal mask"); + } + + /* Create the thread attributes. */ + pthread_attr_init (&attr); + + /* If the user provided a stack size use it. */ + if (stacksize != 0 + && pthread_attr_setstacksize (&attr, stacksize) != 0) + puts ("could not set stack size; will use default"); + /* And stack guard size. */ + if (guardsize != -1 + && pthread_attr_setguardsize (&attr, guardsize) != 0) + puts ("invalid stack guard size; will use default"); + + /* All threads are created detached if we are not using pthread_join + to synchronize. */ + if (sync_method != sync_join) + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + + if (sync_method == sync_signal) + { + while (1) + { + int err; + bool do_wait = false; + + pthread_mutex_lock (&running_mutex); + if (starts-- < 0) + cont = false; + else + do_wait = ++running >= threads && starts > 0; + + pthread_mutex_unlock (&running_mutex); + + if (! cont) + break; + + if (progress) + write (STDERR_FILENO, "t", 1); + + err = pthread_create (&ths[empty], &attr, thread_function, + (void *) starts); + if (err != 0) + error (EXIT_FAILURE, err, "cannot start thread %lu", starts); + + if (++empty == last) + empty = 0; + + if (do_wait) + sigwaitinfo (&ss, NULL); + } + + /* Do nothing anymore. On of the threads will terminate the program. */ + sigfillset (&ss); + sigdelset (&ss, SIGINT); + while (1) + sigsuspend (&ss); + } + else + { + pthread_t ths[toplevel]; + struct start_info si[toplevel]; + unsigned int i; + + for (i = 0; i < toplevel; ++i) + { + unsigned int child_starts = starts / (toplevel - i); + unsigned int child_threads = threads / (toplevel - i); + int err; + + si[i].starts = child_starts; + si[i].threads = child_threads; + + err = pthread_create (&ths[i], &attr, start_threads, &si[i]); + if (err != 0) + error (EXIT_FAILURE, err, "cannot start thread"); + + starts -= child_starts; + threads -= child_threads; + } + + for (i = 0; i < toplevel; ++i) + { + int err = pthread_join (ths[i], NULL); + + if (err != 0) + error (EXIT_FAILURE, err, "cannot join thread"); + } + + /* We are done. */ + if (progress) + write (STDERR_FILENO, "\n", 1); + + if (timing) + { + struct timespec end_time; + + if (clock_gettime (cl, &end_time) == 0) + { + end_time.tv_sec -= start_time.tv_sec; + end_time.tv_nsec -= start_time.tv_nsec; + if (end_time.tv_nsec < 0) + { + end_time.tv_nsec += 1000000000; + --end_time.tv_sec; + } + + printf ("\nRuntime: %lu.%09lu seconds\n", + (unsigned long int) end_time.tv_sec, + (unsigned long int) end_time.tv_nsec); + } + } + + printf ("Result: %08x\n", sum); + + exit (0); + } + + /* NOTREACHED */ + return 0; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + unsigned long int num; + long int snum; + + switch (key) + { + case 't': + num = strtoul (arg, NULL, 0); + if (num < MAX_THREADS) + threads = num; + else + printf ("\ +number of threads limited to %u; recompile with a higher limit if necessary", + MAX_THREADS); + break; + + case 'w': + num = strtoul (arg, NULL, 0); + if (num <= 100) + workload = num; + else + puts ("workload must be between 0 and 100 percent"); + break; + + case 'c': + workcost = strtoul (arg, NULL, 0); + break; + + case 'r': + rounds = strtoul (arg, NULL, 0); + break; + + case 's': + starts = strtoul (arg, NULL, 0); + break; + + case 'S': + num = strtoul (arg, NULL, 0); + if (num >= PTHREAD_STACK_MIN) + stacksize = num; + else + printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN); + break; + + case 'g': + snum = strtol (arg, NULL, 0); + if (snum < 0) + printf ("invalid guard size %s\n", arg); + else + guardsize = snum; + break; + + case 'p': + progress = true; + break; + + case 'T': + timing = true; + break; + + case OPT_TO_THREAD: + to_thread = true; + break; + + case OPT_TO_PROCESS: + to_thread = false; + break; + + case OPT_SYNC_SIGNAL: + sync_method = sync_signal; + break; + + case OPT_SYNC_JOIN: + sync_method = sync_join; + break; + + case OPT_TOPLEVEL: + num = strtoul (arg, NULL, 0); + if (num < MAX_THREADS) + toplevel = num; + else + printf ("\ +number of threads limited to %u; recompile with a higher limit if necessary", + MAX_THREADS); + sync_method = sync_join; + break; + + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + + +static hp_timing_t +get_clockfreq (void) +{ + /* We read the information from the /proc filesystem. It contains at + least one line like + cpu MHz : 497.840237 + or also + cpu MHz : 497.841 + We search for this line and convert the number in an integer. */ + static hp_timing_t result; + int fd; + + /* If this function was called before, we know the result. */ + if (result != 0) + return result; + + fd = open ("/proc/cpuinfo", O_RDONLY); + if (__builtin_expect (fd != -1, 1)) + { + /* XXX AFAIK the /proc filesystem can generate "files" only up + to a size of 4096 bytes. */ + char buf[4096]; + ssize_t n; + + n = read (fd, buf, sizeof buf); + if (__builtin_expect (n, 1) > 0) + { + char *mhz = memmem (buf, n, "cpu MHz", 7); + + if (__builtin_expect (mhz != NULL, 1)) + { + char *endp = buf + n; + int seen_decpoint = 0; + int ndigits = 0; + + /* Search for the beginning of the string. */ + while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n') + ++mhz; + + while (mhz < endp && *mhz != '\n') + { + if (*mhz >= '0' && *mhz <= '9') + { + result *= 10; + result += *mhz - '0'; + if (seen_decpoint) + ++ndigits; + } + else if (*mhz == '.') + seen_decpoint = 1; + + ++mhz; + } + + /* Compensate for missing digits at the end. */ + while (ndigits++ < 6) + result *= 10; + } + } + + close (fd); + } + + return result; +} + + +int +clock_getcpuclockid (pid_t pid, clockid_t *clock_id) +{ + /* We don't allow any process ID but our own. */ + if (pid != 0 && pid != getpid ()) + return EPERM; + +#ifdef CLOCK_PROCESS_CPUTIME_ID + /* Store the number. */ + *clock_id = CLOCK_PROCESS_CPUTIME_ID; + + return 0; +#else + /* We don't have a timer for that. */ + return ENOENT; +#endif +} + + +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var)) + +/* Get current value of CLOCK and store it in TP. */ +int +clock_gettime (clockid_t clock_id, struct timespec *tp) +{ + int retval = -1; + + switch (clock_id) + { + case CLOCK_PROCESS_CPUTIME_ID: + { + + static hp_timing_t freq; + hp_timing_t tsc; + + /* Get the current counter. */ + HP_TIMING_NOW (tsc); + + if (freq == 0) + { + freq = get_clockfreq (); + if (freq == 0) + return EINVAL; + } + + /* Compute the seconds. */ + tp->tv_sec = tsc / freq; + + /* And the nanoseconds. This computation should be stable until + we get machines with about 16GHz frequency. */ + tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq; + + retval = 0; + } + break; + + default: + errno = EINVAL; + break; + } + + return retval; +} |