diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/Makefile | 1 | ||||
-rw-r--r-- | stdlib/qsort.c | 470 | ||||
-rw-r--r-- | stdlib/tst-qsort4.c | 25 | ||||
-rw-r--r-- | stdlib/tst-qsort5.c | 171 |
4 files changed, 226 insertions, 441 deletions
diff --git a/stdlib/Makefile b/stdlib/Makefile index bed39b8258..d587f054d1 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -287,7 +287,6 @@ tests := \ tst-qsort \ tst-qsort2 \ tst-qsort3 \ - tst-qsort5 \ tst-qsort6 \ tst-quick_exit \ tst-rand48 \ diff --git a/stdlib/qsort.c b/stdlib/qsort.c index 0b3c638aa2..b29882388e 100644 --- a/stdlib/qsort.c +++ b/stdlib/qsort.c @@ -19,6 +19,7 @@ Engineering a sort function; Jon Bentley and M. Douglas McIlroy; Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ +#include <errno.h> #include <limits.h> #include <memswap.h> #include <stdlib.h> @@ -32,9 +33,13 @@ enum swap_type_t { SWAP_WORDS_64, SWAP_WORDS_32, + SWAP_VOID_ARG, SWAP_BYTES }; +typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; +typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; + /* If this function returns true, elements can be safely copied using word loads and stores. Otherwise, it might not be safe. BASE (as an integer) must be a multiple of the word alignment. SIZE must be a multiple of @@ -51,7 +56,6 @@ is_aligned (const void *base, size_t size, size_t wordsize) static inline void swap_words_64 (void * restrict a, void * restrict b, size_t n) { - typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; do { n -= 8; @@ -64,7 +68,6 @@ swap_words_64 (void * restrict a, void * restrict b, size_t n) static inline void swap_words_32 (void * restrict a, void * restrict b, size_t n) { - typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; do { n -= 4; @@ -88,43 +91,6 @@ do_swap (void * restrict a, void * restrict b, size_t size, __memswap (a, b, size); } -/* Discontinue quicksort algorithm when partition gets below this size. - This particular magic number was chosen to work best on a Sun 4/260. */ -#define MAX_THRESH 4 - -/* Stack node declarations used to store unfulfilled partition obligations. */ -typedef struct - { - char *lo; - char *hi; - size_t depth; - } stack_node; - -/* The stack needs log (total_elements) entries (we could even subtract - log(MAX_THRESH)). Since total_elements has type size_t, we get as - upper bound for log (total_elements): - bits per byte (CHAR_BIT) * sizeof(size_t). */ -enum { STACK_SIZE = CHAR_BIT * sizeof (size_t) }; - -static inline stack_node * -push (stack_node *top, char *lo, char *hi, size_t depth) -{ - top->lo = lo; - top->hi = hi; - top->depth = depth; - return ++top; -} - -static inline stack_node * -pop (stack_node *top, char **lo, char **hi, size_t *depth) -{ - --top; - *lo = top->lo; - *hi = top->hi; - *depth = top->depth; - return top; -} - /* Establish the heap condition at index K, that is, the key at K will not be less than either of its children, at 2 * K + 1 and 2 * K + 2 (if they exist). N is the last valid index. */ @@ -172,21 +138,35 @@ heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, } } -/* A non-recursive heapsort, used on introsort implementation as a - fallback routine with worst-case performance of O(nlog n) and - worst-case space complexity of O(1). It sorts the array starting - at BASE and ending at END (inclusive), with each element of SIZE - bytes. The SWAP_TYPE is the callback function used to swap - elements, and CMP is the function used to compare elements. */ +static enum swap_type_t +get_swap_type (void *const pbase, size_t size) +{ + if ((size & (sizeof (uint32_t) - 1)) == 0 + && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) + { + if (size == sizeof (uint32_t)) + return SWAP_WORDS_32; + else if (size == sizeof (uint64_t) + && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) + return SWAP_WORDS_64; + } + return SWAP_BYTES; +} + + +/* A non-recursive heapsort with worst-case performance of O(nlog n) and + worst-case space complexity of O(1). It sorts the array starting at + BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback + function used to swap elements, and CMP is the function used to compare + elements. */ static void -heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type, - __compar_d_fn_t cmp, void *arg) +heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) { - size_t n = ((uintptr_t) end - (uintptr_t) base) / size; if (n <= 1) - /* Handled by insertion sort. */ return; + enum swap_type_t swap_type = get_swap_type (base, size); + /* Build the binary heap, largest value at the base[0]. */ heapify (base, size, n, swap_type, cmp, arg); @@ -208,226 +188,226 @@ heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type, } } -static inline void -insertion_sort_qsort_partitions (void *const pbase, size_t total_elems, - size_t size, enum swap_type_t swap_type, - __compar_d_fn_t cmp, void *arg) +/* The maximum size in bytes required by mergesort that will be provided + through a buffer allocated in the stack. */ +#define QSORT_STACK_SIZE 1024 + +/* Elements larger than this value will be sorted through indirect sorting + to minimize the need to memory swap calls. */ +#define INDIRECT_SORT_SIZE_THRES 32 + +struct msort_param { - char *base_ptr = (char *) pbase; - char *const end_ptr = &base_ptr[size * (total_elems - 1)]; - char *tmp_ptr = base_ptr; -#define min(x, y) ((x) < (y) ? (x) : (y)) - const size_t max_thresh = MAX_THRESH * size; - char *thresh = min(end_ptr, base_ptr + max_thresh); - char *run_ptr; + size_t s; + enum swap_type_t var; + __compar_d_fn_t cmp; + void *arg; + char *t; +}; - /* Find smallest element in first threshold and place it at the - array's beginning. This is the smallest array element, - and the operation speeds up insertion sort's inner loop. */ +static void +msort_with_tmp (const struct msort_param *p, void *b, size_t n) +{ + char *b1, *b2; + size_t n1, n2; - for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) - if (cmp (run_ptr, tmp_ptr, arg) < 0) - tmp_ptr = run_ptr; + if (n <= 1) + return; - if (tmp_ptr != base_ptr) - do_swap (tmp_ptr, base_ptr, size, swap_type); + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = (char *) b + (n1 * p->s); - /* Insertion sort, running from left-hand-side up to right-hand-side. */ + msort_with_tmp (p, b1, n1); + msort_with_tmp (p, b2, n2); - run_ptr = base_ptr + size; - while ((run_ptr += size) <= end_ptr) + char *tmp = p->t; + const size_t s = p->s; + __compar_d_fn_t cmp = p->cmp; + void *arg = p->arg; + switch (p->var) { - tmp_ptr = run_ptr - size; - while (tmp_ptr != base_ptr && cmp (run_ptr, tmp_ptr, arg) < 0) - tmp_ptr -= size; - - tmp_ptr += size; - if (tmp_ptr != run_ptr) - { - char *trav; - - trav = run_ptr + size; - while (--trav >= run_ptr) - { - char c = *trav; - char *hi, *lo; - - for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) - *hi = *lo; - *hi = c; - } - } + case SWAP_WORDS_32: + while (n1 > 0 && n2 > 0) + { + if (cmp (b1, b2, arg) <= 0) + { + *(u32_alias_t *) tmp = *(u32_alias_t *) b1; + b1 += sizeof (u32_alias_t); + --n1; + } + else + { + *(u32_alias_t *) tmp = *(u32_alias_t *) b2; + b2 += sizeof (u32_alias_t); + --n2; + } + tmp += sizeof (u32_alias_t); + } + break; + case SWAP_WORDS_64: + while (n1 > 0 && n2 > 0) + { + if (cmp (b1, b2, arg) <= 0) + { + *(u64_alias_t *) tmp = *(u64_alias_t *) b1; + b1 += sizeof (u64_alias_t); + --n1; + } + else + { + *(u64_alias_t *) tmp = *(u64_alias_t *) b2; + b2 += sizeof (u64_alias_t); + --n2; + } + tmp += sizeof (u64_alias_t); + } + break; + case SWAP_VOID_ARG: + while (n1 > 0 && n2 > 0) + { + if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) + { + *(void **) tmp = *(void **) b1; + b1 += sizeof (void *); + --n1; + } + else + { + *(void **) tmp = *(void **) b2; + b2 += sizeof (void *); + --n2; + } + tmp += sizeof (void *); + } + break; + default: + while (n1 > 0 && n2 > 0) + { + if (cmp (b1, b2, arg) <= 0) + { + tmp = (char *) __mempcpy (tmp, b1, s); + b1 += s; + --n1; + } + else + { + tmp = (char *) __mempcpy (tmp, b2, s); + b2 += s; + --n2; + } + } + break; } -} - -/* Order size using quicksort. This implementation incorporates - four optimizations discussed in Sedgewick: - 1. Non-recursive, using an explicit stack of pointer that store the - next array partition to sort. To save time, this maximum amount - of space required to store an array of SIZE_MAX is allocated on the - stack. Assuming a 32-bit (64 bit) integer for size_t, this needs - only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). - Pretty cheap, actually. - - 2. Chose the pivot element using a median-of-three decision tree. - This reduces the probability of selecting a bad pivot value and - eliminates certain extraneous comparisons. + if (n1 > 0) + memcpy (tmp, b1, n1 * s); + memcpy (b, p->t, (n - n2) * s); +} - 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving - insertion sort to order the MAX_THRESH items within each partition. - This is a big win, since insertion sort is faster for small, mostly - sorted array segments. +static void +__attribute_used__ +indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, + size_t s) +{ + /* Indirect sorting. */ + char *ip = (char *) b; + void **tp = (void **) (p->t + n * sizeof (void *)); + void **t = tp; + void *tmp_storage = (void *) (tp + n); - 4. The larger of the two sub-partitions is always pushed onto the - stack first, with the algorithm then concentrating on the - smaller partition. This *guarantees* no more than log (total_elems) - stack size is needed (actually O(1) in this case)! */ + while ((void *) t < tmp_storage) + { + *t++ = ip; + ip += s; + } + msort_with_tmp (p, p->t + n * sizeof (void *), n); + + /* tp[0] .. tp[n - 1] is now sorted, copy around entries of + the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ + char *kp; + size_t i; + for (i = 0, ip = (char *) b; i < n; i++, ip += s) + if ((kp = tp[i]) != ip) + { + size_t j = i; + char *jp = ip; + memcpy (tmp_storage, ip, s); + + do + { + size_t k = (kp - (char *) b) / s; + tp[j] = jp; + memcpy (jp, kp, s); + j = k; + jp = kp; + kp = tp[k]; + } + while (kp != ip); + + tp[j] = jp; + memcpy (jp, tmp_storage, s); + } +} void __qsort_r (void *const pbase, size_t total_elems, size_t size, __compar_d_fn_t cmp, void *arg) { - char *base_ptr = (char *) pbase; - - const size_t max_thresh = MAX_THRESH * size; - if (total_elems <= 1) - /* Avoid lossage with unsigned arithmetic below. */ return; - enum swap_type_t swap_type; - if (is_aligned (pbase, size, 8)) - swap_type = SWAP_WORDS_64; - else if (is_aligned (pbase, size, 4)) - swap_type = SWAP_WORDS_32; - else - swap_type = SWAP_BYTES; + /* Align to the maximum size used by the swap optimization. */ + _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; + size_t total_size = total_elems * size; + char *buf; - /* Maximum depth before quicksort switches to heapsort. */ - size_t depth = 2 * (sizeof (size_t) * CHAR_BIT - 1 - - __builtin_clzl (total_elems)); + if (size > INDIRECT_SORT_SIZE_THRES) + total_size = 2 * total_elems * sizeof (void *) + size; - if (total_elems > MAX_THRESH) + if (total_size < sizeof buf) + buf = tmp; + else { - char *lo = base_ptr; - char *hi = &lo[size * (total_elems - 1)]; - stack_node stack[STACK_SIZE]; - stack_node *top = push (stack, NULL, NULL, depth); - - while (stack < top) - { - if (depth == 0) - { - heapsort_r (lo, hi, size, swap_type, cmp, arg); - top = pop (top, &lo, &hi, &depth); - continue; - } - - char *left_ptr; - char *right_ptr; - - /* Select median value from among LO, MID, and HI. Rearrange - LO and HI so the three values are sorted. This lowers the - probability of picking a pathological pivot value and - skips a comparison for both the LEFT_PTR and RIGHT_PTR in - the while loops. */ - - char *mid = lo + size * ((hi - lo) / size >> 1); - - if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) - do_swap (mid, lo, size, swap_type); - if ((*cmp) ((void *) hi, (void *) mid, arg) < 0) - do_swap (mid, hi, size, swap_type); - else - goto jump_over; - if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) - do_swap (mid, lo, size, swap_type); - jump_over:; - - left_ptr = lo + size; - right_ptr = hi - size; - - /* Here's the famous ``collapse the walls'' section of quicksort. - Gotta like those tight inner loops! They are the main reason - that this algorithm runs much faster than others. */ - do - { - while (left_ptr != mid - && (*cmp) ((void *) left_ptr, (void *) mid, arg) < 0) - left_ptr += size; - - while (right_ptr != mid - && (*cmp) ((void *) mid, (void *) right_ptr, arg) < 0) - right_ptr -= size; - - if (left_ptr < right_ptr) - { - do_swap (left_ptr, right_ptr, size, swap_type); - if (mid == left_ptr) - mid = right_ptr; - else if (mid == right_ptr) - mid = left_ptr; - left_ptr += size; - right_ptr -= size; - } - else if (left_ptr == right_ptr) - { - left_ptr += size; - right_ptr -= size; - break; - } - } - while (left_ptr <= right_ptr); - - /* Set up pointers for next iteration. First determine whether - left and right partitions are below the threshold size. If so, - ignore one or both. Otherwise, push the larger partition's - bounds on the stack and continue sorting the smaller one. */ - - if ((size_t) (right_ptr - lo) <= max_thresh) - { - if ((size_t) (hi - left_ptr) <= max_thresh) - /* Ignore both small partitions. */ - { - top = pop (top, &lo, &hi, &depth); - --depth; - } - else - { - /* Ignore small left partition. */ - lo = left_ptr; - --depth; - } - } - else if ((size_t) (hi - left_ptr) <= max_thresh) - /* Ignore small right partition. */ - { - hi = right_ptr; - --depth; - } - else if ((right_ptr - lo) > (hi - left_ptr)) - { - /* Push larger left partition indices. */ - top = push (top, lo, right_ptr, depth - 1); - lo = left_ptr; - } - else - { - /* Push larger right partition indices. */ - top = push (top, left_ptr, hi, depth - 1); - hi = right_ptr; - } - } + int save = errno; + buf = malloc (total_size); + __set_errno (save); + if (buf == NULL) + { + /* Fallback to heapsort in case of memory failure. */ + heapsort_r (pbase, total_elems - 1, size, cmp, arg); + return; + } + } + + if (size > INDIRECT_SORT_SIZE_THRES) + { + const struct msort_param msort_param = + { + .s = sizeof (void *), + .cmp = cmp, + .arg = arg, + .var = SWAP_VOID_ARG, + .t = buf, + }; + indirect_msort_with_tmp (&msort_param, pbase, total_elems, size); + } + else + { + const struct msort_param msort_param = + { + .s = size, + .cmp = cmp, + .arg = arg, + .var = get_swap_type (pbase, size), + .t = buf, + }; + msort_with_tmp (&msort_param, pbase, total_elems); } - /* Once the BASE_PTR array is partially sorted by quicksort the rest - is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning - of the array to sort, and END_PTR points at the very last element in - the array (*not* one beyond it!). */ - insertion_sort_qsort_partitions (pbase, total_elems, size, swap_type, cmp, - arg); + if (buf != tmp) + free (buf); } libc_hidden_def (__qsort_r) weak_alias (__qsort_r, qsort_r) diff --git a/stdlib/tst-qsort4.c b/stdlib/tst-qsort4.c index 5e631c65dc..4635275419 100644 --- a/stdlib/tst-qsort4.c +++ b/stdlib/tst-qsort4.c @@ -30,35 +30,12 @@ cmp (const void *a1, const void *b1, void *closure) return *a - *b; } -/* Wrapper around heapsort_r that set ups the required variables. */ -static void -heapsort_wrapper (void *const pbase, size_t total_elems, size_t size, - __compar_d_fn_t cmp, void *arg) -{ - char *base_ptr = (char *) pbase; - char *lo = base_ptr; - char *hi = &lo[size * (total_elems - 1)]; - - if (total_elems <= 1) - /* Avoid lossage with unsigned arithmetic below. */ - return; - - enum swap_type_t swap_type; - if (is_aligned (pbase, size, 8)) - swap_type = SWAP_WORDS_64; - else if (is_aligned (pbase, size, 4)) - swap_type = SWAP_WORDS_32; - else - swap_type = SWAP_BYTES; - heapsort_r (lo, hi, size, swap_type, cmp, arg); -} - static void check_one_sort (signed char *array, int length) { signed char *copy = xmalloc (length); memcpy (copy, array, length); - heapsort_wrapper (copy, length, 1, cmp, NULL); + heapsort_r (copy, length - 1, 1, cmp, NULL); /* Verify that the result is sorted. */ for (int i = 1; i < length; ++i) diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c deleted file mode 100644 index ad0a4b0fd5..0000000000 --- a/stdlib/tst-qsort5.c +++ /dev/null @@ -1,171 +0,0 @@ -/* Adversarial test for qsort_r. - Copyright (C) 2023-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* The approach follows Douglas McIlroy, A Killer Adversary for - Quicksort. Software—Practice and Experience 29 (1999) 341-344. - Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf> - (2023-11-17). */ - -#include <math.h> -#include <stdlib.h> -#include <stdio.h> -#include <support/check.h> -#include <support/support.h> - -struct context -{ - /* Called the gas value in the paper. This value is larger than all - other values (length minus one will do), so comparison with any - decided value has a known result. */ - int undecided_value; - - /* If comparing undecided values, one of them as to be assigned a - value to ensure consistency with future comparisons. This is the - value that will be used. Starts out at zero. */ - int next_decided; - - /* Used to trick pivot selection. Deciding the value for the last - seen undcided value in a decided/undecided comparison happens - to trick the many qsort implementations. */ - int last_undecided_index; - - /* This array contains the actually asigned values. The call to - qsort_r sorts a different array that contains indices into this - array. */ - int *decided_values; -}; - -static int -compare_opponent (const void *l1, const void *r1, void *ctx1) -{ - const int *l = l1; - const int *r = r1; - struct context *ctx = ctx1; - int rvalue = ctx->decided_values[*r]; - int lvalue = ctx->decided_values[*l]; - - if (lvalue == ctx->undecided_value) - { - if (rvalue == ctx->undecided_value) - { - /* Both values are undecided. In this case, make a decision - for the last-used undecided value. This is tweak is very - specific to quicksort. */ - if (*l == ctx->last_undecided_index) - { - ctx->decided_values[*l] = ctx->next_decided; - ++ctx->next_decided; - /* The undecided value or *r is greater. */ - return -1; - } - else - { - ctx->decided_values[*r] = ctx->next_decided; - ++ctx->next_decided; - /* The undecided value for *l is greater. */ - return 1; - } - } - else - { - ctx->last_undecided_index = *l; - return 1; - } - } - else - { - /* *l is a decided value. */ - if (rvalue == ctx->undecided_value) - { - ctx->last_undecided_index = *r; - /* The undecided value for *r is greater. */ - return -1; - } - else - return lvalue - rvalue; - } -} - -/* Return a pointer to the adversarial permutation of length N. */ -static int * -create_permutation (size_t n) -{ - struct context ctx = - { - .undecided_value = n - 1, /* Larger than all other values. */ - .decided_values = xcalloc (n, sizeof (int)), - }; - for (size_t i = 0; i < n; ++i) - ctx.decided_values[i] = ctx.undecided_value; - int *scratch = xcalloc (n, sizeof (int)); - for (size_t i = 0; i < n; ++i) - scratch[i] = i; - qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx); - free (scratch); - return ctx.decided_values; -} - -/* Callback function for qsort which counts the number of invocations - in *CLOSURE. */ -static int -compare_counter (const void *l1, const void *r1, void *closure) -{ - const int *l = l1; - const int *r = r1; - unsigned long long int *counter = closure; - ++*counter; - return *l - *r; -} - -/* Count the comparisons required for an adversarial permutation of - length N. */ -static unsigned long long int -count_comparisons (size_t n) -{ - int *array = create_permutation (n); - unsigned long long int counter = 0; - qsort_r (array, n, sizeof (*array), compare_counter, &counter); - free (array); - return counter; -} - -/* Check the scaling factor for one adversarial permutation of length - N, and report some statistics. */ -static void -check_one_n (size_t n) -{ - unsigned long long int count = count_comparisons (n); - double factor = count / (n * log (count)); - printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n", - n, count, factor); - /* This is an arbitrary factor which is true for the current - implementation across a wide range of sizes. */ - TEST_VERIFY (factor <= 4.5); -} - -static int -do_test (void) -{ - check_one_n (100); - check_one_n (1000); - for (int i = 1; i <= 15; ++i) - check_one_n (i * 10 * 1000); - return 0; -} - -#include <support/test-driver.c> |