diff options
Diffstat (limited to 'db/btree')
-rw-r--r-- | db/btree/bt_close.c | 182 | ||||
-rw-r--r-- | db/btree/bt_conv.c | 221 | ||||
-rw-r--r-- | db/btree/bt_debug.c | 329 | ||||
-rw-r--r-- | db/btree/bt_delete.c | 657 | ||||
-rw-r--r-- | db/btree/bt_get.c | 105 | ||||
-rw-r--r-- | db/btree/bt_open.c | 458 | ||||
-rw-r--r-- | db/btree/bt_overflow.c | 228 | ||||
-rw-r--r-- | db/btree/bt_page.c | 100 | ||||
-rw-r--r-- | db/btree/bt_put.c | 321 | ||||
-rw-r--r-- | db/btree/bt_search.c | 213 | ||||
-rw-r--r-- | db/btree/bt_seq.c | 460 | ||||
-rw-r--r-- | db/btree/bt_split.c | 829 | ||||
-rw-r--r-- | db/btree/bt_utils.c | 260 | ||||
-rw-r--r-- | db/btree/btree.h | 395 | ||||
-rw-r--r-- | db/btree/extern.h | 70 |
15 files changed, 0 insertions, 4828 deletions
diff --git a/db/btree/bt_close.c b/db/btree/bt_close.c deleted file mode 100644 index 27f9ab660f..0000000000 --- a/db/btree/bt_close.c +++ /dev/null @@ -1,182 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_close.c 8.7 (Berkeley) 8/17/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/param.h> - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <db.h> -#include "btree.h" - -static int bt_meta __P((BTREE *)); - -/* - * BT_CLOSE -- Close a btree. - * - * Parameters: - * dbp: pointer to access method - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -int -__bt_close(dbp) - DB *dbp; -{ - BTREE *t; - int fd; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* Sync the tree. */ - if (__bt_sync(dbp, 0) == RET_ERROR) - return (RET_ERROR); - - /* Close the memory pool. */ - if (mpool_close(t->bt_mp) == RET_ERROR) - return (RET_ERROR); - - /* Free random memory. */ - if (t->bt_cursor.key.data != NULL) { - free(t->bt_cursor.key.data); - t->bt_cursor.key.size = 0; - t->bt_cursor.key.data = NULL; - } - if (t->bt_rkey.data) { - free(t->bt_rkey.data); - t->bt_rkey.size = 0; - t->bt_rkey.data = NULL; - } - if (t->bt_rdata.data) { - free(t->bt_rdata.data); - t->bt_rdata.size = 0; - t->bt_rdata.data = NULL; - } - - fd = t->bt_fd; - free(t); - free(dbp); - return (close(fd) ? RET_ERROR : RET_SUCCESS); -} - -/* - * BT_SYNC -- sync the btree to disk. - * - * Parameters: - * dbp: pointer to access method - * - * Returns: - * RET_SUCCESS, RET_ERROR. - */ -int -__bt_sync(dbp, flags) - const DB *dbp; - u_int flags; -{ - BTREE *t; - int status; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* Sync doesn't currently take any flags. */ - if (flags != 0) { - errno = EINVAL; - return (RET_ERROR); - } - - if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED)) - return (RET_SUCCESS); - - if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) - return (RET_ERROR); - - if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS) - F_CLR(t, B_MODIFIED); - - return (status); -} - -/* - * BT_META -- write the tree meta data to disk. - * - * Parameters: - * t: tree - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -static int -bt_meta(t) - BTREE *t; -{ - BTMETA m; - void *p; - - if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL) - return (RET_ERROR); - - /* Fill in metadata. */ - m.magic = BTREEMAGIC; - m.version = BTREEVERSION; - m.psize = t->bt_psize; - m.free = t->bt_free; - m.nrecs = t->bt_nrecs; - m.flags = F_ISSET(t, SAVEMETA); - - memmove(p, &m, sizeof(BTMETA)); - mpool_put(t->bt_mp, p, MPOOL_DIRTY); - return (RET_SUCCESS); -} diff --git a/db/btree/bt_conv.c b/db/btree/bt_conv.c deleted file mode 100644 index 1cb208b14d..0000000000 --- a/db/btree/bt_conv.c +++ /dev/null @@ -1,221 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/param.h> - -#include <stdio.h> - -#include <db.h> -#include "btree.h" - -static void mswap __P((PAGE *)); - -/* - * __BT_BPGIN, __BT_BPGOUT -- - * Convert host-specific number layout to/from the host-independent - * format stored on disk. - * - * Parameters: - * t: tree - * pg: page number - * h: page to convert - */ -void -__bt_pgin(t, pg, pp) - void *t; - pgno_t pg; - void *pp; -{ - PAGE *h; - indx_t i, top; - u_char flags; - char *p; - - if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) - return; - if (pg == P_META) { - mswap(pp); - return; - } - - h = pp; - M_32_SWAP(h->pgno); - M_32_SWAP(h->prevpg); - M_32_SWAP(h->nextpg); - M_32_SWAP(h->flags); - M_16_SWAP(h->lower); - M_16_SWAP(h->upper); - - top = NEXTINDEX(h); - if ((h->flags & P_TYPE) == P_BINTERNAL) - for (i = 0; i < top; i++) { - M_16_SWAP(h->linp[i]); - p = (char *)GETBINTERNAL(h, i); - P_32_SWAP(p); - p += sizeof(u_int32_t); - P_32_SWAP(p); - p += sizeof(pgno_t); - if (*(u_char *)p & P_BIGKEY) { - p += sizeof(u_char); - P_32_SWAP(p); - p += sizeof(pgno_t); - P_32_SWAP(p); - } - } - else if ((h->flags & P_TYPE) == P_BLEAF) - for (i = 0; i < top; i++) { - M_16_SWAP(h->linp[i]); - p = (char *)GETBLEAF(h, i); - P_32_SWAP(p); - p += sizeof(u_int32_t); - P_32_SWAP(p); - p += sizeof(u_int32_t); - flags = *(u_char *)p; - if (flags & (P_BIGKEY | P_BIGDATA)) { - p += sizeof(u_char); - if (flags & P_BIGKEY) { - P_32_SWAP(p); - p += sizeof(pgno_t); - P_32_SWAP(p); - } - if (flags & P_BIGDATA) { - p += sizeof(u_int32_t); - P_32_SWAP(p); - p += sizeof(pgno_t); - P_32_SWAP(p); - } - } - } -} - -void -__bt_pgout(t, pg, pp) - void *t; - pgno_t pg; - void *pp; -{ - PAGE *h; - indx_t i, top; - u_char flags; - char *p; - - if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) - return; - if (pg == P_META) { - mswap(pp); - return; - } - - h = pp; - top = NEXTINDEX(h); - if ((h->flags & P_TYPE) == P_BINTERNAL) - for (i = 0; i < top; i++) { - p = (char *)GETBINTERNAL(h, i); - P_32_SWAP(p); - p += sizeof(u_int32_t); - P_32_SWAP(p); - p += sizeof(pgno_t); - if (*(u_char *)p & P_BIGKEY) { - p += sizeof(u_char); - P_32_SWAP(p); - p += sizeof(pgno_t); - P_32_SWAP(p); - } - M_16_SWAP(h->linp[i]); - } - else if ((h->flags & P_TYPE) == P_BLEAF) - for (i = 0; i < top; i++) { - p = (char *)GETBLEAF(h, i); - P_32_SWAP(p); - p += sizeof(u_int32_t); - P_32_SWAP(p); - p += sizeof(u_int32_t); - flags = *(u_char *)p; - if (flags & (P_BIGKEY | P_BIGDATA)) { - p += sizeof(u_char); - if (flags & P_BIGKEY) { - P_32_SWAP(p); - p += sizeof(pgno_t); - P_32_SWAP(p); - } - if (flags & P_BIGDATA) { - p += sizeof(u_int32_t); - P_32_SWAP(p); - p += sizeof(pgno_t); - P_32_SWAP(p); - } - } - M_16_SWAP(h->linp[i]); - } - - M_32_SWAP(h->pgno); - M_32_SWAP(h->prevpg); - M_32_SWAP(h->nextpg); - M_32_SWAP(h->flags); - M_16_SWAP(h->lower); - M_16_SWAP(h->upper); -} - -/* - * MSWAP -- Actually swap the bytes on the meta page. - * - * Parameters: - * p: page to convert - */ -static void -mswap(pg) - PAGE *pg; -{ - char *p; - - p = (char *)pg; - P_32_SWAP(p); /* magic */ - p += sizeof(u_int32_t); - P_32_SWAP(p); /* version */ - p += sizeof(u_int32_t); - P_32_SWAP(p); /* psize */ - p += sizeof(u_int32_t); - P_32_SWAP(p); /* free */ - p += sizeof(u_int32_t); - P_32_SWAP(p); /* nrecs */ - p += sizeof(u_int32_t); - P_32_SWAP(p); /* flags */ - p += sizeof(u_int32_t); -} diff --git a/db/btree/bt_debug.c b/db/btree/bt_debug.c deleted file mode 100644 index 3aefbe7622..0000000000 --- a/db/btree/bt_debug.c +++ /dev/null @@ -1,329 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_debug.c 8.5 (Berkeley) 8/17/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/param.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <db.h> -#include "btree.h" - -#ifdef DEBUG -/* - * BT_DUMP -- Dump the tree - * - * Parameters: - * dbp: pointer to the DB - */ -void -__bt_dump(dbp) - DB *dbp; -{ - BTREE *t; - PAGE *h; - pgno_t i; - char *sep; - - t = dbp->internal; - (void)fprintf(stderr, "%s: pgsz %d", - F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); - if (F_ISSET(t, R_RECNO)) - (void)fprintf(stderr, " keys %lu", t->bt_nrecs); -#undef X -#define X(flag, name) \ - if (F_ISSET(t, flag)) { \ - (void)fprintf(stderr, "%s%s", sep, name); \ - sep = ", "; \ - } - if (t->flags != 0) { - sep = " flags ("; - X(R_FIXLEN, "FIXLEN"); - X(B_INMEM, "INMEM"); - X(B_NODUPS, "NODUPS"); - X(B_RDONLY, "RDONLY"); - X(R_RECNO, "RECNO"); - X(B_METADIRTY,"METADIRTY"); - (void)fprintf(stderr, ")\n"); - } -#undef X - - for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { - __bt_dpage(h); - (void)mpool_put(t->bt_mp, h, 0); - } -} - -/* - * BT_DMPAGE -- Dump the meta page - * - * Parameters: - * h: pointer to the PAGE - */ -void -__bt_dmpage(h) - PAGE *h; -{ - BTMETA *m; - char *sep; - - m = (BTMETA *)h; - (void)fprintf(stderr, "magic %lx\n", m->magic); - (void)fprintf(stderr, "version %lu\n", m->version); - (void)fprintf(stderr, "psize %lu\n", m->psize); - (void)fprintf(stderr, "free %lu\n", m->free); - (void)fprintf(stderr, "nrecs %lu\n", m->nrecs); - (void)fprintf(stderr, "flags %lu", m->flags); -#undef X -#define X(flag, name) \ - if (m->flags & flag) { \ - (void)fprintf(stderr, "%s%s", sep, name); \ - sep = ", "; \ - } - if (m->flags) { - sep = " ("; - X(B_NODUPS, "NODUPS"); - X(R_RECNO, "RECNO"); - (void)fprintf(stderr, ")"); - } -} - -/* - * BT_DNPAGE -- Dump the page - * - * Parameters: - * n: page number to dump. - */ -void -__bt_dnpage(dbp, pgno) - DB *dbp; - pgno_t pgno; -{ - BTREE *t; - PAGE *h; - - t = dbp->internal; - if ((h = mpool_get(t->bt_mp, pgno, 0)) != NULL) { - __bt_dpage(h); - (void)mpool_put(t->bt_mp, h, 0); - } -} - -/* - * BT_DPAGE -- Dump the page - * - * Parameters: - * h: pointer to the PAGE - */ -void -__bt_dpage(h) - PAGE *h; -{ - BINTERNAL *bi; - BLEAF *bl; - RINTERNAL *ri; - RLEAF *rl; - indx_t cur, top; - char *sep; - - (void)fprintf(stderr, " page %d: (", h->pgno); -#undef X -#define X(flag, name) \ - if (h->flags & flag) { \ - (void)fprintf(stderr, "%s%s", sep, name); \ - sep = ", "; \ - } - sep = ""; - X(P_BINTERNAL, "BINTERNAL") /* types */ - X(P_BLEAF, "BLEAF") - X(P_RINTERNAL, "RINTERNAL") /* types */ - X(P_RLEAF, "RLEAF") - X(P_OVERFLOW, "OVERFLOW") - X(P_PRESERVE, "PRESERVE"); - (void)fprintf(stderr, ")\n"); -#undef X - - (void)fprintf(stderr, "\tprev %2d next %2d", h->prevpg, h->nextpg); - if (h->flags & P_OVERFLOW) - return; - - top = NEXTINDEX(h); - (void)fprintf(stderr, " lower %3d upper %3d nextind %d\n", - h->lower, h->upper, top); - for (cur = 0; cur < top; cur++) { - (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); - switch (h->flags & P_TYPE) { - case P_BINTERNAL: - bi = GETBINTERNAL(h, cur); - (void)fprintf(stderr, - "size %03d pgno %03d", bi->ksize, bi->pgno); - if (bi->flags & P_BIGKEY) - (void)fprintf(stderr, " (indirect)"); - else if (bi->ksize) - (void)fprintf(stderr, - " {%.*s}", (int)bi->ksize, bi->bytes); - break; - case P_RINTERNAL: - ri = GETRINTERNAL(h, cur); - (void)fprintf(stderr, "entries %03d pgno %03d", - ri->nrecs, ri->pgno); - break; - case P_BLEAF: - bl = GETBLEAF(h, cur); - if (bl->flags & P_BIGKEY) - (void)fprintf(stderr, - "big key page %lu size %u/", - *(pgno_t *)bl->bytes, - *(u_int32_t *)(bl->bytes + sizeof(pgno_t))); - else if (bl->ksize) - (void)fprintf(stderr, "%s/", bl->bytes); - if (bl->flags & P_BIGDATA) - (void)fprintf(stderr, - "big data page %lu size %u", - *(pgno_t *)(bl->bytes + bl->ksize), - *(u_int32_t *)(bl->bytes + bl->ksize + - sizeof(pgno_t))); - else if (bl->dsize) - (void)fprintf(stderr, "%.*s", - (int)bl->dsize, bl->bytes + bl->ksize); - break; - case P_RLEAF: - rl = GETRLEAF(h, cur); - if (rl->flags & P_BIGDATA) - (void)fprintf(stderr, - "big data page %lu size %u", - *(pgno_t *)rl->bytes, - *(u_int32_t *)(rl->bytes + sizeof(pgno_t))); - else if (rl->dsize) - (void)fprintf(stderr, - "%.*s", (int)rl->dsize, rl->bytes); - break; - } - (void)fprintf(stderr, "\n"); - } -} -#endif - -#ifdef STATISTICS -/* - * BT_STAT -- Gather/print the tree statistics - * - * Parameters: - * dbp: pointer to the DB - */ -void -__bt_stat(dbp) - DB *dbp; -{ - extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit; - extern u_long bt_sortsplit, bt_split; - BTREE *t; - PAGE *h; - pgno_t i, pcont, pinternal, pleaf; - u_long ifree, lfree, nkeys; - int levels; - - t = dbp->internal; - pcont = pinternal = pleaf = 0; - nkeys = ifree = lfree = 0; - for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { - switch (h->flags & P_TYPE) { - case P_BINTERNAL: - case P_RINTERNAL: - ++pinternal; - ifree += h->upper - h->lower; - break; - case P_BLEAF: - case P_RLEAF: - ++pleaf; - lfree += h->upper - h->lower; - nkeys += NEXTINDEX(h); - break; - case P_OVERFLOW: - ++pcont; - break; - } - (void)mpool_put(t->bt_mp, h, 0); - } - - /* Count the levels of the tree. */ - for (i = P_ROOT, levels = 0 ;; ++levels) { - h = mpool_get(t->bt_mp, i, 0); - if (h->flags & (P_BLEAF|P_RLEAF)) { - if (levels == 0) - levels = 1; - (void)mpool_put(t->bt_mp, h, 0); - break; - } - i = F_ISSET(t, R_RECNO) ? - GETRINTERNAL(h, 0)->pgno : - GETBINTERNAL(h, 0)->pgno; - (void)mpool_put(t->bt_mp, h, 0); - } - - (void)fprintf(stderr, "%d level%s with %ld keys", - levels, levels == 1 ? "" : "s", nkeys); - if (F_ISSET(t, R_RECNO)) - (void)fprintf(stderr, " (%ld header count)", t->bt_nrecs); - (void)fprintf(stderr, - "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n", - pinternal + pleaf + pcont, pleaf, pinternal, pcont); - (void)fprintf(stderr, "%ld cache hits, %ld cache misses\n", - bt_cache_hit, bt_cache_miss); - (void)fprintf(stderr, "%ld splits (%ld root splits, %ld sort splits)\n", - bt_split, bt_rootsplit, bt_sortsplit); - pleaf *= t->bt_psize - BTDATAOFF; - if (pleaf) - (void)fprintf(stderr, - "%.0f%% leaf fill (%ld bytes used, %ld bytes free)\n", - ((double)(pleaf - lfree) / pleaf) * 100, - pleaf - lfree, lfree); - pinternal *= t->bt_psize - BTDATAOFF; - if (pinternal) - (void)fprintf(stderr, - "%.0f%% internal fill (%ld bytes used, %ld bytes free\n", - ((double)(pinternal - ifree) / pinternal) * 100, - pinternal - ifree, ifree); - if (bt_pfxsaved) - (void)fprintf(stderr, "prefix checking removed %lu bytes.\n", - bt_pfxsaved); -} -#endif diff --git a/db/btree/bt_delete.c b/db/btree/bt_delete.c deleted file mode 100644 index b654c9f1f8..0000000000 --- a/db/btree/bt_delete.c +++ /dev/null @@ -1,657 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <errno.h> -#include <stdio.h> -#include <string.h> - -#include <db.h> -#include "btree.h" - -static int __bt_bdelete __P((BTREE *, const DBT *)); -static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); -static int __bt_pdelete __P((BTREE *, PAGE *)); -static int __bt_relink __P((BTREE *, PAGE *)); -static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); - -/* - * __bt_delete - * Delete the item(s) referenced by a key. - * - * Return RET_SPECIAL if the key is not found. - */ -int -__bt_delete(dbp, key, flags) - const DB *dbp; - const DBT *key; - u_int flags; -{ - BTREE *t; - CURSOR *c; - PAGE *h; - int status; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* Check for change to a read-only tree. */ - if (F_ISSET(t, B_RDONLY)) { - errno = EPERM; - return (RET_ERROR); - } - - switch (flags) { - case 0: - status = __bt_bdelete(t, key); - break; - case R_CURSOR: - /* - * If flags is R_CURSOR, delete the cursor. Must already - * have started a scan and not have already deleted it. - */ - c = &t->bt_cursor; - if (F_ISSET(c, CURS_INIT)) { - if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) - return (RET_ERROR); - - /* - * If the page is about to be emptied, we'll need to - * delete it, which means we have to acquire a stack. - */ - if (NEXTINDEX(h) == 1) - if (__bt_stkacq(t, &h, &t->bt_cursor)) - return (RET_ERROR); - - status = __bt_dleaf(t, NULL, h, c->pg.index); - - if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { - if (__bt_pdelete(t, h)) - return (RET_ERROR); - } else - mpool_put(t->bt_mp, - h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); - break; - } - /* FALLTHROUGH */ - default: - errno = EINVAL; - return (RET_ERROR); - } - if (status == RET_SUCCESS) - F_SET(t, B_MODIFIED); - return (status); -} - -/* - * __bt_stkacq -- - * Acquire a stack so we can delete a cursor entry. - * - * Parameters: - * t: tree - * hp: pointer to current, pinned PAGE pointer - * c: pointer to the cursor - * - * Returns: - * 0 on success, 1 on failure - */ -static int -__bt_stkacq(t, hp, c) - BTREE *t; - PAGE **hp; - CURSOR *c; -{ - BINTERNAL *bi; - EPG *e; - EPGNO *parent; - PAGE *h; - indx_t index; - pgno_t pgno; - recno_t nextpg, prevpg; - int exact, level; - - /* - * Find the first occurrence of the key in the tree. Toss the - * currently locked page so we don't hit an already-locked page. - */ - h = *hp; - mpool_put(t->bt_mp, h, 0); - if ((e = __bt_search(t, &c->key, &exact)) == NULL) - return (1); - h = e->page; - - /* See if we got it in one shot. */ - if (h->pgno == c->pg.pgno) - goto ret; - - /* - * Move right, looking for the page. At each move we have to move - * up the stack until we don't have to move to the next page. If - * we have to change pages at an internal level, we have to fix the - * stack back up. - */ - while (h->pgno != c->pg.pgno) { - if ((nextpg = h->nextpg) == P_INVALID) - break; - mpool_put(t->bt_mp, h, 0); - - /* Move up the stack. */ - for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { - /* Get the parent page. */ - if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) - return (1); - - /* Move to the next index. */ - if (parent->index != NEXTINDEX(h) - 1) { - index = parent->index + 1; - BT_PUSH(t, h->pgno, index); - break; - } - mpool_put(t->bt_mp, h, 0); - } - - /* Restore the stack. */ - while (level--) { - /* Push the next level down onto the stack. */ - bi = GETBINTERNAL(h, index); - pgno = bi->pgno; - BT_PUSH(t, pgno, 0); - - /* Lose the currently pinned page. */ - mpool_put(t->bt_mp, h, 0); - - /* Get the next level down. */ - if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) - return (1); - index = 0; - } - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) - return (1); - } - - if (h->pgno == c->pg.pgno) - goto ret; - - /* Reacquire the original stack. */ - mpool_put(t->bt_mp, h, 0); - if ((e = __bt_search(t, &c->key, &exact)) == NULL) - return (1); - h = e->page; - - /* - * Move left, looking for the page. At each move we have to move - * up the stack until we don't have to change pages to move to the - * next page. If we have to change pages at an internal level, we - * have to fix the stack back up. - */ - while (h->pgno != c->pg.pgno) { - if ((prevpg = h->prevpg) == P_INVALID) - break; - mpool_put(t->bt_mp, h, 0); - - /* Move up the stack. */ - for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { - /* Get the parent page. */ - if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) - return (1); - - /* Move to the next index. */ - if (parent->index != 0) { - index = parent->index - 1; - BT_PUSH(t, h->pgno, index); - break; - } - mpool_put(t->bt_mp, h, 0); - } - - /* Restore the stack. */ - while (level--) { - /* Push the next level down onto the stack. */ - bi = GETBINTERNAL(h, index); - pgno = bi->pgno; - - /* Lose the currently pinned page. */ - mpool_put(t->bt_mp, h, 0); - - /* Get the next level down. */ - if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) - return (1); - - index = NEXTINDEX(h) - 1; - BT_PUSH(t, pgno, index); - } - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) - return (1); - } - - -ret: mpool_put(t->bt_mp, h, 0); - return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); -} - -/* - * __bt_bdelete -- - * Delete all key/data pairs matching the specified key. - * - * Parameters: - * t: tree - * key: key to delete - * - * Returns: - * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. - */ -static int -__bt_bdelete(t, key) - BTREE *t; - const DBT *key; -{ - EPG *e; - PAGE *h; - int deleted, exact, redo; - - deleted = 0; - - /* Find any matching record; __bt_search pins the page. */ -loop: if ((e = __bt_search(t, key, &exact)) == NULL) - return (deleted ? RET_SUCCESS : RET_ERROR); - if (!exact) { - mpool_put(t->bt_mp, e->page, 0); - return (deleted ? RET_SUCCESS : RET_SPECIAL); - } - - /* - * Delete forward, then delete backward, from the found key. If - * there are duplicates and we reach either side of the page, do - * the key search again, so that we get them all. - */ - redo = 0; - h = e->page; - do { - if (__bt_dleaf(t, key, h, e->index)) { - mpool_put(t->bt_mp, h, 0); - return (RET_ERROR); - } - if (F_ISSET(t, B_NODUPS)) { - if (NEXTINDEX(h) == 0) { - if (__bt_pdelete(t, h)) - return (RET_ERROR); - } else - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - return (RET_SUCCESS); - } - deleted = 1; - } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); - - /* Check for right-hand edge of the page. */ - if (e->index == NEXTINDEX(h)) - redo = 1; - - /* Delete from the key to the beginning of the page. */ - while (e->index-- > 0) { - if (__bt_cmp(t, key, e) != 0) - break; - if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { - mpool_put(t->bt_mp, h, 0); - return (RET_ERROR); - } - if (e->index == 0) - redo = 1; - } - - /* Check for an empty page. */ - if (NEXTINDEX(h) == 0) { - if (__bt_pdelete(t, h)) - return (RET_ERROR); - goto loop; - } - - /* Put the page. */ - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - - if (redo) - goto loop; - return (RET_SUCCESS); -} - -/* - * __bt_pdelete -- - * Delete a single page from the tree. - * - * Parameters: - * t: tree - * h: leaf page - * - * Returns: - * RET_SUCCESS, RET_ERROR. - * - * Side-effects: - * mpool_put's the page - */ -static int -__bt_pdelete(t, h) - BTREE *t; - PAGE *h; -{ - BINTERNAL *bi; - PAGE *pg; - EPGNO *parent; - indx_t cnt, index, *ip, offset; - u_int32_t nksize; - char *from; - - /* - * Walk the parent page stack -- a LIFO stack of the pages that were - * traversed when we searched for the page where the delete occurred. - * Each stack entry is a page number and a page index offset. The - * offset is for the page traversed on the search. We've just deleted - * a page, so we have to delete the key from the parent page. - * - * If the delete from the parent page makes it empty, this process may - * continue all the way up the tree. We stop if we reach the root page - * (which is never deleted, it's just not worth the effort) or if the - * delete does not empty the page. - */ - while ((parent = BT_POP(t)) != NULL) { - /* Get the parent page. */ - if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) - return (RET_ERROR); - - index = parent->index; - bi = GETBINTERNAL(pg, index); - - /* Free any overflow pages. */ - if (bi->flags & P_BIGKEY && - __ovfl_delete(t, bi->bytes) == RET_ERROR) { - mpool_put(t->bt_mp, pg, 0); - return (RET_ERROR); - } - - /* - * Free the parent if it has only the one key and it's not the - * root page. If it's the rootpage, turn it back into an empty - * leaf page. - */ - if (NEXTINDEX(pg) == 1) { - if (pg->pgno == P_ROOT) { - pg->lower = BTDATAOFF; - pg->upper = t->bt_psize; - pg->flags = P_BLEAF; - } else { - if (__bt_relink(t, pg) || __bt_free(t, pg)) - return (RET_ERROR); - continue; - } - } else { - /* Pack remaining key items at the end of the page. */ - nksize = NBINTERNAL(bi->ksize); - from = (char *)pg + pg->upper; - memmove(from + nksize, from, (char *)bi - from); - pg->upper += nksize; - - /* Adjust indices' offsets, shift the indices down. */ - offset = pg->linp[index]; - for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) - if (ip[0] < offset) - ip[0] += nksize; - for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) - ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; - pg->lower -= sizeof(indx_t); - } - - mpool_put(t->bt_mp, pg, MPOOL_DIRTY); - break; - } - - /* Free the leaf page, as long as it wasn't the root. */ - if (h->pgno == P_ROOT) { - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - return (RET_SUCCESS); - } - return (__bt_relink(t, h) || __bt_free(t, h)); -} - -/* - * __bt_dleaf -- - * Delete a single record from a leaf page. - * - * Parameters: - * t: tree - * key: referenced key - * h: page - * index: index on page to delete - * - * Returns: - * RET_SUCCESS, RET_ERROR. - */ -int -__bt_dleaf(t, key, h, index) - BTREE *t; - const DBT *key; - PAGE *h; - u_int index; -{ - BLEAF *bl; - indx_t cnt, *ip, offset; - u_int32_t nbytes; - void *to; - char *from; - - /* If this record is referenced by the cursor, delete the cursor. */ - if (F_ISSET(&t->bt_cursor, CURS_INIT) && - !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && - t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && - __bt_curdel(t, key, h, index)) - return (RET_ERROR); - - /* If the entry uses overflow pages, make them available for reuse. */ - to = bl = GETBLEAF(h, index); - if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) - return (RET_ERROR); - if (bl->flags & P_BIGDATA && - __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) - return (RET_ERROR); - - /* Pack the remaining key/data items at the end of the page. */ - nbytes = NBLEAF(bl); - from = (char *)h + h->upper; - memmove(from + nbytes, from, (char *)to - from); - h->upper += nbytes; - - /* Adjust the indices' offsets, shift the indices down. */ - offset = h->linp[index]; - for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) - if (ip[0] < offset) - ip[0] += nbytes; - for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) - ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; - h->lower -= sizeof(indx_t); - - /* If the cursor is on this page, adjust it as necessary. */ - if (F_ISSET(&t->bt_cursor, CURS_INIT) && - !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && - t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) - --t->bt_cursor.pg.index; - - return (RET_SUCCESS); -} - -/* - * __bt_curdel -- - * Delete the cursor. - * - * Parameters: - * t: tree - * key: referenced key (or NULL) - * h: page - * index: index on page to delete - * - * Returns: - * RET_SUCCESS, RET_ERROR. - */ -static int -__bt_curdel(t, key, h, index) - BTREE *t; - const DBT *key; - PAGE *h; - u_int index; -{ - CURSOR *c; - EPG e; - PAGE *pg; - int curcopy, status; - - /* - * If there are duplicates, move forward or backward to one. - * Otherwise, copy the key into the cursor area. - */ - c = &t->bt_cursor; - F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); - - curcopy = 0; - if (!F_ISSET(t, B_NODUPS)) { - /* - * We're going to have to do comparisons. If we weren't - * provided a copy of the key, i.e. the user is deleting - * the current cursor position, get one. - */ - if (key == NULL) { - e.page = h; - e.index = index; - if ((status = __bt_ret(t, &e, - &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) - return (status); - curcopy = 1; - key = &c->key; - } - /* Check previous key, if not at the beginning of the page. */ - if (index > 0) { - e.page = h; - e.index = index - 1; - if (__bt_cmp(t, key, &e) == 0) { - F_SET(c, CURS_BEFORE); - goto dup2; - } - } - /* Check next key, if not at the end of the page. */ - if (index < NEXTINDEX(h) - 1) { - e.page = h; - e.index = index + 1; - if (__bt_cmp(t, key, &e) == 0) { - F_SET(c, CURS_AFTER); - goto dup2; - } - } - /* Check previous key if at the beginning of the page. */ - if (index == 0 && h->prevpg != P_INVALID) { - if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) - return (RET_ERROR); - e.page = pg; - e.index = NEXTINDEX(pg) - 1; - if (__bt_cmp(t, key, &e) == 0) { - F_SET(c, CURS_BEFORE); - goto dup1; - } - mpool_put(t->bt_mp, pg, 0); - } - /* Check next key if at the end of the page. */ - if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { - if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) - return (RET_ERROR); - e.page = pg; - e.index = 0; - if (__bt_cmp(t, key, &e) == 0) { - F_SET(c, CURS_AFTER); -dup1: mpool_put(t->bt_mp, pg, 0); -dup2: c->pg.pgno = e.page->pgno; - c->pg.index = e.index; - return (RET_SUCCESS); - } - mpool_put(t->bt_mp, pg, 0); - } - } - e.page = h; - e.index = index; - if (curcopy || (status = - __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { - F_SET(c, CURS_ACQUIRE); - return (RET_SUCCESS); - } - return (status); -} - -/* - * __bt_relink -- - * Link around a deleted page. - * - * Parameters: - * t: tree - * h: page to be deleted - */ -static int -__bt_relink(t, h) - BTREE *t; - PAGE *h; -{ - PAGE *pg; - - if (h->nextpg != P_INVALID) { - if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) - return (RET_ERROR); - pg->prevpg = h->prevpg; - mpool_put(t->bt_mp, pg, MPOOL_DIRTY); - } - if (h->prevpg != P_INVALID) { - if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) - return (RET_ERROR); - pg->nextpg = h->nextpg; - mpool_put(t->bt_mp, pg, MPOOL_DIRTY); - } - return (0); -} diff --git a/db/btree/bt_get.c b/db/btree/bt_get.c deleted file mode 100644 index 74824c73f4..0000000000 --- a/db/btree/bt_get.c +++ /dev/null @@ -1,105 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <errno.h> -#include <stddef.h> -#include <stdio.h> - -#include <db.h> -#include "btree.h" - -/* - * __BT_GET -- Get a record from the btree. - * - * Parameters: - * dbp: pointer to access method - * key: key to find - * data: data to return - * flag: currently unused - * - * Returns: - * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. - */ -int -__bt_get(dbp, key, data, flags) - const DB *dbp; - const DBT *key; - DBT *data; - u_int flags; -{ - BTREE *t; - EPG *e; - int exact, status; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* Get currently doesn't take any flags. */ - if (flags) { - errno = EINVAL; - return (RET_ERROR); - } - - if ((e = __bt_search(t, key, &exact)) == NULL) - return (RET_ERROR); - if (!exact) { - mpool_put(t->bt_mp, e->page, 0); - return (RET_SPECIAL); - } - - status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0); - - /* - * If the user is doing concurrent access, we copied the - * key/data, toss the page. - */ - if (F_ISSET(t, B_DB_LOCK)) - mpool_put(t->bt_mp, e->page, 0); - else - t->bt_pinned = e->page; - return (status); -} diff --git a/db/btree/bt_open.c b/db/btree/bt_open.c deleted file mode 100644 index 8c2f48ebf5..0000000000 --- a/db/btree/bt_open.c +++ /dev/null @@ -1,458 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_open.c 8.10 (Berkeley) 8/17/94"; -#endif /* LIBC_SCCS and not lint */ - -/* - * Implementation of btree access method for 4.4BSD. - * - * The design here was originally based on that of the btree access method - * used in the Postgres database system at UC Berkeley. This implementation - * is wholly independent of the Postgres code. - */ - -#include <sys/param.h> -#include <sys/stat.h> - -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <db.h> -#include "btree.h" - -#ifdef DEBUG -#undef MINPSIZE -#define MINPSIZE 128 -#endif - -static int byteorder __P((void)); -static int nroot __P((BTREE *)); -static int tmp __P((void)); - -/* - * __BT_OPEN -- Open a btree. - * - * Creates and fills a DB struct, and calls the routine that actually - * opens the btree. - * - * Parameters: - * fname: filename (NULL for in-memory trees) - * flags: open flag bits - * mode: open permission bits - * b: BTREEINFO pointer - * - * Returns: - * NULL on failure, pointer to DB on success. - * - */ -DB * -__bt_open(fname, flags, mode, openinfo, dflags) - const char *fname; - int flags, mode, dflags; - const BTREEINFO *openinfo; -{ - struct stat sb; - BTMETA m; - BTREE *t; - BTREEINFO b; - DB *dbp; - pgno_t ncache; - ssize_t nr; - int machine_lorder; - - t = NULL; - - /* - * Intention is to make sure all of the user's selections are okay - * here and then use them without checking. Can't be complete, since - * we don't know the right page size, lorder or flags until the backing - * file is opened. Also, the file's page size can cause the cachesize - * to change. - */ - machine_lorder = byteorder(); - if (openinfo) { - b = *openinfo; - - /* Flags: R_DUP. */ - if (b.flags & ~(R_DUP)) - goto einval; - - /* - * Page size must be indx_t aligned and >= MINPSIZE. Default - * page size is set farther on, based on the underlying file - * transfer size. - */ - if (b.psize && - (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 || - b.psize & (sizeof(indx_t) - 1))) - goto einval; - - /* Minimum number of keys per page; absolute minimum is 2. */ - if (b.minkeypage) { - if (b.minkeypage < 2) - goto einval; - } else - b.minkeypage = DEFMINKEYPAGE; - - /* If no comparison, use default comparison and prefix. */ - if (b.compare == NULL) { - b.compare = __bt_defcmp; - if (b.prefix == NULL) - b.prefix = __bt_defpfx; - } - - if (b.lorder == 0) - b.lorder = machine_lorder; - } else { - b.compare = __bt_defcmp; - b.cachesize = 0; - b.flags = 0; - b.lorder = machine_lorder; - b.minkeypage = DEFMINKEYPAGE; - b.prefix = __bt_defpfx; - b.psize = 0; - } - - /* Check for the ubiquitous PDP-11. */ - if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN) - goto einval; - - /* Allocate and initialize DB and BTREE structures. */ - if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL) - goto err; - memset(t, 0, sizeof(BTREE)); - t->bt_fd = -1; /* Don't close unopened fd on error. */ - t->bt_lorder = b.lorder; - t->bt_order = NOT; - t->bt_cmp = b.compare; - t->bt_pfx = b.prefix; - t->bt_rfd = -1; - - if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL) - goto err; - memset(t->bt_dbp, 0, sizeof(DB)); - if (t->bt_lorder != machine_lorder) - F_SET(t, B_NEEDSWAP); - - dbp->type = DB_BTREE; - dbp->internal = t; - dbp->close = __bt_close; - dbp->del = __bt_delete; - dbp->fd = __bt_fd; - dbp->get = __bt_get; - dbp->put = __bt_put; - dbp->seq = __bt_seq; - dbp->sync = __bt_sync; - - /* - * If no file name was supplied, this is an in-memory btree and we - * open a backing temporary file. Otherwise, it's a disk-based tree. - */ - if (fname) { - switch (flags & O_ACCMODE) { - case O_RDONLY: - F_SET(t, B_RDONLY); - break; - case O_RDWR: - break; - case O_WRONLY: - default: - goto einval; - } - - if ((t->bt_fd = open(fname, flags, mode)) < 0) - goto err; - - } else { - if ((flags & O_ACCMODE) != O_RDWR) - goto einval; - if ((t->bt_fd = tmp()) == -1) - goto err; - F_SET(t, B_INMEM); - } - - if (fcntl(t->bt_fd, F_SETFD, 1) == -1) - goto err; - - if (fstat(t->bt_fd, &sb)) - goto err; - if (sb.st_size) { - if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0) - goto err; - if (nr != sizeof(BTMETA)) - goto eftype; - - /* - * Read in the meta-data. This can change the notion of what - * the lorder, page size and flags are, and, when the page size - * changes, the cachesize value can change too. If the user - * specified the wrong byte order for an existing database, we - * don't bother to return an error, we just clear the NEEDSWAP - * bit. - */ - if (m.magic == BTREEMAGIC) - F_CLR(t, B_NEEDSWAP); - else { - F_SET(t, B_NEEDSWAP); - M_32_SWAP(m.magic); - M_32_SWAP(m.version); - M_32_SWAP(m.psize); - M_32_SWAP(m.free); - M_32_SWAP(m.nrecs); - M_32_SWAP(m.flags); - } - if (m.magic != BTREEMAGIC || m.version != BTREEVERSION) - goto eftype; - if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 || - m.psize & (sizeof(indx_t) - 1)) - goto eftype; - if (m.flags & ~SAVEMETA) - goto eftype; - b.psize = m.psize; - F_SET(t, m.flags); - t->bt_free = m.free; - t->bt_nrecs = m.nrecs; - } else { - /* - * Set the page size to the best value for I/O to this file. - * Don't overflow the page offset type. - */ - if (b.psize == 0) { -#ifdef _STATBUF_ST_BLKSIZE - b.psize = sb.st_blksize; -#endif - if (b.psize < MINPSIZE) - b.psize = MINPSIZE; - if (b.psize > MAX_PAGE_OFFSET + 1) - b.psize = MAX_PAGE_OFFSET + 1; - } - - /* Set flag if duplicates permitted. */ - if (!(b.flags & R_DUP)) - F_SET(t, B_NODUPS); - - t->bt_free = P_INVALID; - t->bt_nrecs = 0; - F_SET(t, B_METADIRTY); - } - - t->bt_psize = b.psize; - - /* Set the cache size; must be a multiple of the page size. */ - if (b.cachesize && b.cachesize & (b.psize - 1)) - b.cachesize += (~b.cachesize & (b.psize - 1)) + 1; - if (b.cachesize < b.psize * MINCACHE) - b.cachesize = b.psize * MINCACHE; - - /* Calculate number of pages to cache. */ - ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize; - - /* - * The btree data structure requires that at least two keys can fit on - * a page, but other than that there's no fixed requirement. The user - * specified a minimum number per page, and we translated that into the - * number of bytes a key/data pair can use before being placed on an - * overflow page. This calculation includes the page header, the size - * of the index referencing the leaf item and the size of the leaf item - * structure. Also, don't let the user specify a minkeypage such that - * a key/data pair won't fit even if both key and data are on overflow - * pages. - */ - t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage - - (sizeof(indx_t) + NBLEAFDBT(0, 0)); - if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t)) - t->bt_ovflsize = - NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t); - - /* Initialize the buffer pool. */ - if ((t->bt_mp = - mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL) - goto err; - if (!F_ISSET(t, B_INMEM)) - mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t); - - /* Create a root page if new tree. */ - if (nroot(t) == RET_ERROR) - goto err; - - /* Global flags. */ - if (dflags & DB_LOCK) - F_SET(t, B_DB_LOCK); - if (dflags & DB_SHMEM) - F_SET(t, B_DB_SHMEM); - if (dflags & DB_TXN) - F_SET(t, B_DB_TXN); - - return (dbp); - -einval: errno = EINVAL; - goto err; - -eftype: errno = EFTYPE; - goto err; - -err: if (t) { - if (t->bt_dbp) - free(t->bt_dbp); - if (t->bt_fd != -1) - (void)close(t->bt_fd); - free(t); - } - return (NULL); -} - -/* - * NROOT -- Create the root of a new tree. - * - * Parameters: - * t: tree - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -static int -nroot(t) - BTREE *t; -{ - PAGE *meta, *root; - pgno_t npg; - - if ((meta = mpool_get(t->bt_mp, 0, 0)) != NULL) { - mpool_put(t->bt_mp, meta, 0); - return (RET_SUCCESS); - } - if (errno != EINVAL) /* It's OK to not exist. */ - return (RET_ERROR); - errno = 0; - - if ((meta = mpool_new(t->bt_mp, &npg)) == NULL) - return (RET_ERROR); - - if ((root = mpool_new(t->bt_mp, &npg)) == NULL) - return (RET_ERROR); - - if (npg != P_ROOT) - return (RET_ERROR); - root->pgno = npg; - root->prevpg = root->nextpg = P_INVALID; - root->lower = BTDATAOFF; - root->upper = t->bt_psize; - root->flags = P_BLEAF; - memset(meta, 0, t->bt_psize); - mpool_put(t->bt_mp, meta, MPOOL_DIRTY); - mpool_put(t->bt_mp, root, MPOOL_DIRTY); - return (RET_SUCCESS); -} - -static int -tmp() -{ - sigset_t set, oset; - int fd; - const char *envtmp; - char *path; - static const char fmt[] = "%s/bt.XXXXXX"; - size_t n; - - envtmp = getenv("TMPDIR"); - if (!envtmp) - envtmp = "/tmp"; - n = strlen (envtmp) + sizeof fmt; -#ifdef __GNUC__ - path = __builtin_alloca(n); -#else - path = malloc(n); -#endif - (void)snprintf(path, n, fmt, envtmp ? envtmp : "/tmp"); - - (void)sigfillset(&set); - (void)sigprocmask(SIG_BLOCK, &set, &oset); - if ((fd = mkstemp(path)) != -1) - (void)unlink(path); - (void)sigprocmask(SIG_SETMASK, &oset, NULL); -#ifndef __GNUC__ - free(path); -#endif - return(fd); -} - -static int -byteorder() -{ - u_int32_t x; - u_char *p; - - x = 0x01020304; - p = (u_char *)&x; - switch (*p) { - case 1: - return (BIG_ENDIAN); - case 4: - return (LITTLE_ENDIAN); - default: - return (0); - } -} - -int -__bt_fd(dbp) - const DB *dbp; -{ - BTREE *t; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* In-memory database can't have a file descriptor. */ - if (F_ISSET(t, B_INMEM)) { - errno = ENOENT; - return (-1); - } - return (t->bt_fd); -} diff --git a/db/btree/bt_overflow.c b/db/btree/bt_overflow.c deleted file mode 100644 index b28b8e0471..0000000000 --- a/db/btree/bt_overflow.c +++ /dev/null @@ -1,228 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/param.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <db.h> -#include "btree.h" - -/* - * Big key/data code. - * - * Big key and data entries are stored on linked lists of pages. The initial - * reference is byte string stored with the key or data and is the page number - * and size. The actual record is stored in a chain of pages linked by the - * nextpg field of the PAGE header. - * - * The first page of the chain has a special property. If the record is used - * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set - * in the header. - * - * XXX - * A single DBT is written to each chain, so a lot of space on the last page - * is wasted. This is a fairly major bug for some data sets. - */ - -/* - * __OVFL_GET -- Get an overflow key/data item. - * - * Parameters: - * t: tree - * p: pointer to { pgno_t, u_int32_t } - * buf: storage address - * bufsz: storage size - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -int -__ovfl_get(t, p, ssz, buf, bufsz) - BTREE *t; - void *p; - size_t *ssz; - void **buf; - size_t *bufsz; -{ - PAGE *h; - pgno_t pg; - size_t nb, plen; - u_int32_t sz; - - memmove(&pg, p, sizeof(pgno_t)); - memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); - *ssz = sz; - -#ifdef DEBUG - if (pg == P_INVALID || sz == 0) - abort(); -#endif - /* Make the buffer bigger as necessary. */ - if (*bufsz < sz) { - *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz)); - if (*buf == NULL) - return (RET_ERROR); - *bufsz = sz; - } - - /* - * Step through the linked list of pages, copying the data on each one - * into the buffer. Never copy more than the data's length. - */ - plen = t->bt_psize - BTDATAOFF; - for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) { - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - - nb = MIN(sz, plen); - memmove(p, (char *)h + BTDATAOFF, nb); - mpool_put(t->bt_mp, h, 0); - - if ((sz -= nb) == 0) - break; - } - return (RET_SUCCESS); -} - -/* - * __OVFL_PUT -- Store an overflow key/data item. - * - * Parameters: - * t: tree - * data: DBT to store - * pgno: storage page number - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -int -__ovfl_put(t, dbt, pg) - BTREE *t; - const DBT *dbt; - pgno_t *pg; -{ - PAGE *h, *last; - void *p; - pgno_t npg; - size_t nb, plen; - u_int32_t sz; - - /* - * Allocate pages and copy the key/data record into them. Store the - * number of the first page in the chain. - */ - plen = t->bt_psize - BTDATAOFF; - for (last = NULL, p = dbt->data, sz = dbt->size;; - p = (char *)p + plen, last = h) { - if ((h = __bt_new(t, &npg)) == NULL) - return (RET_ERROR); - - h->pgno = npg; - h->nextpg = h->prevpg = P_INVALID; - h->flags = P_OVERFLOW; - h->lower = h->upper = 0; - - nb = MIN(sz, plen); - memmove((char *)h + BTDATAOFF, p, nb); - - if (last) { - last->nextpg = h->pgno; - mpool_put(t->bt_mp, last, MPOOL_DIRTY); - } else - *pg = h->pgno; - - if ((sz -= nb) == 0) { - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - break; - } - } - return (RET_SUCCESS); -} - -/* - * __OVFL_DELETE -- Delete an overflow chain. - * - * Parameters: - * t: tree - * p: pointer to { pgno_t, u_int32_t } - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -int -__ovfl_delete(t, p) - BTREE *t; - void *p; -{ - PAGE *h; - pgno_t pg; - size_t plen; - u_int32_t sz; - - memmove(&pg, p, sizeof(pgno_t)); - memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); - -#ifdef DEBUG - if (pg == P_INVALID || sz == 0) - abort(); -#endif - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - - /* Don't delete chains used by internal pages. */ - if (h->flags & P_PRESERVE) { - mpool_put(t->bt_mp, h, 0); - return (RET_SUCCESS); - } - - /* Step through the chain, calling the free routine for each page. */ - for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) { - pg = h->nextpg; - __bt_free(t, h); - if (sz <= plen) - break; - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } - return (RET_SUCCESS); -} diff --git a/db/btree/bt_page.c b/db/btree/bt_page.c deleted file mode 100644 index ce9cbf15a6..0000000000 --- a/db/btree/bt_page.c +++ /dev/null @@ -1,100 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_page.c 8.3 (Berkeley) 7/14/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <stdio.h> - -#include <db.h> -#include "btree.h" - -/* - * __bt_free -- - * Put a page on the freelist. - * - * Parameters: - * t: tree - * h: page to free - * - * Returns: - * RET_ERROR, RET_SUCCESS - * - * Side-effect: - * mpool_put's the page. - */ -int -__bt_free(t, h) - BTREE *t; - PAGE *h; -{ - /* Insert the page at the head of the free list. */ - h->prevpg = P_INVALID; - h->nextpg = t->bt_free; - t->bt_free = h->pgno; - F_SET(t, B_METADIRTY); - - /* Make sure the page gets written back. */ - return (mpool_put(t->bt_mp, h, MPOOL_DIRTY)); -} - -/* - * __bt_new -- - * Get a new page, preferably from the freelist. - * - * Parameters: - * t: tree - * npg: storage for page number. - * - * Returns: - * Pointer to a page, NULL on error. - */ -PAGE * -__bt_new(t, npg) - BTREE *t; - pgno_t *npg; -{ - PAGE *h; - - if (t->bt_free != P_INVALID && - (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { - *npg = t->bt_free; - t->bt_free = h->nextpg; - F_SET(t, B_METADIRTY); - return (h); - } - return (mpool_new(t->bt_mp, npg)); -} diff --git a/db/btree/bt_put.c b/db/btree/bt_put.c deleted file mode 100644 index 15309c66e2..0000000000 --- a/db/btree/bt_put.c +++ /dev/null @@ -1,321 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <db.h> -#include "btree.h" - -static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *)); - -/* - * __BT_PUT -- Add a btree item to the tree. - * - * Parameters: - * dbp: pointer to access method - * key: key - * data: data - * flag: R_NOOVERWRITE - * - * Returns: - * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the - * tree and R_NOOVERWRITE specified. - */ -int -__bt_put(dbp, key, data, flags) - const DB *dbp; - DBT *key; - const DBT *data; - u_int flags; -{ - BTREE *t; - DBT tkey, tdata; - EPG *e; - PAGE *h; - indx_t index, nxtindex; - pgno_t pg; - u_int32_t nbytes; - int dflags, exact, status; - char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* Check for change to a read-only tree. */ - if (F_ISSET(t, B_RDONLY)) { - errno = EPERM; - return (RET_ERROR); - } - - switch (flags) { - case 0: - case R_NOOVERWRITE: - break; - case R_CURSOR: - /* - * If flags is R_CURSOR, put the cursor. Must already - * have started a scan and not have already deleted it. - */ - if (F_ISSET(&t->bt_cursor, CURS_INIT) && - !F_ISSET(&t->bt_cursor, - CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) - break; - /* FALLTHROUGH */ - default: - errno = EINVAL; - return (RET_ERROR); - } - - /* - * If the key/data pair won't fit on a page, store it on overflow - * pages. Only put the key on the overflow page if the pair are - * still too big after moving the data to an overflow page. - * - * XXX - * If the insert fails later on, the overflow pages aren't recovered. - */ - dflags = 0; - if (key->size + data->size > t->bt_ovflsize) { - if (key->size > t->bt_ovflsize) { -storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) - return (RET_ERROR); - tkey.data = kb; - tkey.size = NOVFLSIZE; - memmove(kb, &pg, sizeof(pgno_t)); - memmove(kb + sizeof(pgno_t), - &key->size, sizeof(u_int32_t)); - dflags |= P_BIGKEY; - key = &tkey; - } - if (key->size + data->size > t->bt_ovflsize) { - if (__ovfl_put(t, data, &pg) == RET_ERROR) - return (RET_ERROR); - tdata.data = db; - tdata.size = NOVFLSIZE; - memmove(db, &pg, sizeof(pgno_t)); - memmove(db + sizeof(pgno_t), - &data->size, sizeof(u_int32_t)); - dflags |= P_BIGDATA; - data = &tdata; - } - if (key->size + data->size > t->bt_ovflsize) - goto storekey; - } - - /* Replace the cursor. */ - if (flags == R_CURSOR) { - if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL) - return (RET_ERROR); - index = t->bt_cursor.pg.index; - goto delete; - } - - /* - * Find the key to delete, or, the location at which to insert. - * Bt_fast and __bt_search both pin the returned page. - */ - if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) - if ((e = __bt_search(t, key, &exact)) == NULL) - return (RET_ERROR); - h = e->page; - index = e->index; - - /* - * Add the key/data pair to the tree. If an identical key is already - * in the tree, and R_NOOVERWRITE is set, an error is returned. If - * R_NOOVERWRITE is not set, the key is either added (if duplicates are - * permitted) or an error is returned. - */ - switch (flags) { - case R_NOOVERWRITE: - if (!exact) - break; - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - default: - if (!exact || !F_ISSET(t, B_NODUPS)) - break; - /* - * !!! - * Note, the delete may empty the page, so we need to put a - * new entry into the page immediately. - */ -delete: if (__bt_dleaf(t, key, h, index) == RET_ERROR) { - mpool_put(t->bt_mp, h, 0); - return (RET_ERROR); - } - break; - } - - /* - * If not enough room, or the user has put a ceiling on the number of - * keys permitted in the page, split the page. The split code will - * insert the key and data and unpin the current page. If inserting - * into the offset array, shift the pointers up. - */ - nbytes = NBLEAFDBT(key->size, data->size); - if ((u_int32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) { - if ((status = __bt_split(t, h, key, - data, dflags, nbytes, index)) != RET_SUCCESS) - return (status); - goto success; - } - - if (index < (nxtindex = NEXTINDEX(h))) - memmove(h->linp + index + 1, h->linp + index, - (nxtindex - index) * sizeof(indx_t)); - h->lower += sizeof(indx_t); - - h->linp[index] = h->upper -= nbytes; - dest = (char *)h + h->upper; - WR_BLEAF(dest, key, data, dflags); - - /* If the cursor is on this page, adjust it as necessary. */ - if (F_ISSET(&t->bt_cursor, CURS_INIT) && - !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && - t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= index) - ++t->bt_cursor.pg.index; - - if (t->bt_order == NOT) { - if (h->nextpg == P_INVALID) { - if (index == NEXTINDEX(h) - 1) { - t->bt_order = FORWARD; - t->bt_last.index = index; - t->bt_last.pgno = h->pgno; - } - } else if (h->prevpg == P_INVALID) { - if (index == 0) { - t->bt_order = BACK; - t->bt_last.index = 0; - t->bt_last.pgno = h->pgno; - } - } - } - - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - -success: - if (flags == R_SETCURSOR) - __bt_setcur(t, e->page->pgno, e->index); - - F_SET(t, B_MODIFIED); - return (RET_SUCCESS); -} - -#ifdef STATISTICS -u_long bt_cache_hit, bt_cache_miss; -#endif - -/* - * BT_FAST -- Do a quick check for sorted data. - * - * Parameters: - * t: tree - * key: key to insert - * - * Returns: - * EPG for new record or NULL if not found. - */ -static EPG * -bt_fast(t, key, data, exactp) - BTREE *t; - const DBT *key, *data; - int *exactp; -{ - PAGE *h; - u_int32_t nbytes; - int cmp; - - if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { - t->bt_order = NOT; - return (NULL); - } - t->bt_cur.page = h; - t->bt_cur.index = t->bt_last.index; - - /* - * If won't fit in this page or have too many keys in this page, - * have to search to get split stack. - */ - nbytes = NBLEAFDBT(key->size, data->size); - if ((u_int32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) - goto miss; - - if (t->bt_order == FORWARD) { - if (t->bt_cur.page->nextpg != P_INVALID) - goto miss; - if (t->bt_cur.index != NEXTINDEX(h) - 1) - goto miss; - if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0) - goto miss; - t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index; - } else { - if (t->bt_cur.page->prevpg != P_INVALID) - goto miss; - if (t->bt_cur.index != 0) - goto miss; - if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0) - goto miss; - t->bt_last.index = 0; - } - *exactp = cmp == 0; -#ifdef STATISTICS - ++bt_cache_hit; -#endif - return (&t->bt_cur); - -miss: -#ifdef STATISTICS - ++bt_cache_miss; -#endif - t->bt_order = NOT; - mpool_put(t->bt_mp, h, 0); - return (NULL); -} diff --git a/db/btree/bt_search.c b/db/btree/bt_search.c deleted file mode 100644 index 485afcbbf0..0000000000 --- a/db/btree/bt_search.c +++ /dev/null @@ -1,213 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_search.c 8.8 (Berkeley) 7/31/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <stdio.h> - -#include <db.h> -#include "btree.h" - -static int __bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); -static int __bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); - -/* - * __bt_search -- - * Search a btree for a key. - * - * Parameters: - * t: tree to search - * key: key to find - * exactp: pointer to exact match flag - * - * Returns: - * The EPG for matching record, if any, or the EPG for the location - * of the key, if it were inserted into the tree, is entered into - * the bt_cur field of the tree. A pointer to the field is returned. - */ -EPG * -__bt_search(t, key, exactp) - BTREE *t; - const DBT *key; - int *exactp; -{ - PAGE *h; - indx_t base, index, lim; - pgno_t pg; - int cmp; - - BT_CLR(t); - for (pg = P_ROOT;;) { - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (NULL); - - /* Do a binary search on the current page. */ - t->bt_cur.page = h; - for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) { - t->bt_cur.index = index = base + (lim >> 1); - if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) { - if (h->flags & P_BLEAF) { - *exactp = 1; - return (&t->bt_cur); - } - goto next; - } - if (cmp > 0) { - base = index + 1; - --lim; - } - } - - /* - * If it's a leaf page, we're almost done. If no duplicates - * are allowed, or we have an exact match, we're done. Else, - * it's possible that there were matching keys on this page, - * which later deleted, and we're on a page with no matches - * while there are matches on other pages. If at the start or - * end of a page, check the adjacent page. - */ - if (h->flags & P_BLEAF) { - if (!F_ISSET(t, B_NODUPS)) { - if (base == 0 && - h->prevpg != P_INVALID && - __bt_sprev(t, h, key, exactp)) - return (&t->bt_cur); - if (base == NEXTINDEX(h) && - h->nextpg != P_INVALID && - __bt_snext(t, h, key, exactp)) - return (&t->bt_cur); - } - *exactp = 0; - t->bt_cur.index = base; - return (&t->bt_cur); - } - - /* - * No match found. Base is the smallest index greater than - * key and may be zero or a last + 1 index. If it's non-zero, - * decrement by one, and record the internal page which should - * be a parent page for the key. If a split later occurs, the - * inserted page will be to the right of the saved page. - */ - index = base ? base - 1 : base; - -next: BT_PUSH(t, h->pgno, index); - pg = GETBINTERNAL(h, index)->pgno; - mpool_put(t->bt_mp, h, 0); - } -} - -/* - * __bt_snext -- - * Check for an exact match after the key. - * - * Parameters: - * t: tree - * h: current page - * key: key - * exactp: pointer to exact match flag - * - * Returns: - * If an exact match found. - */ -static int -__bt_snext(t, h, key, exactp) - BTREE *t; - PAGE *h; - const DBT *key; - int *exactp; -{ - EPG e; - - /* - * Get the next page. The key is either an exact - * match, or not as good as the one we already have. - */ - if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) - return (0); - e.index = 0; - if (__bt_cmp(t, key, &e) == 0) { - mpool_put(t->bt_mp, h, 0); - t->bt_cur = e; - *exactp = 1; - return (1); - } - mpool_put(t->bt_mp, e.page, 0); - return (0); -} - -/* - * __bt_sprev -- - * Check for an exact match before the key. - * - * Parameters: - * t: tree - * h: current page - * key: key - * exactp: pointer to exact match flag - * - * Returns: - * If an exact match found. - */ -static int -__bt_sprev(t, h, key, exactp) - BTREE *t; - PAGE *h; - const DBT *key; - int *exactp; -{ - EPG e; - - /* - * Get the previous page. The key is either an exact - * match, or not as good as the one we already have. - */ - if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) - return (0); - e.index = NEXTINDEX(e.page) - 1; - if (__bt_cmp(t, key, &e) == 0) { - mpool_put(t->bt_mp, h, 0); - t->bt_cur = e; - *exactp = 1; - return (1); - } - mpool_put(t->bt_mp, e.page, 0); - return (0); -} diff --git a/db/btree/bt_seq.c b/db/btree/bt_seq.c deleted file mode 100644 index 90f896036f..0000000000 --- a/db/btree/bt_seq.c +++ /dev/null @@ -1,460 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <errno.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> - -#include <db.h> -#include "btree.h" - -static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); -static int __bt_seqadv __P((BTREE *, EPG *, int)); -static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); - -/* - * Sequential scan support. - * - * The tree can be scanned sequentially, starting from either end of the - * tree or from any specific key. A scan request before any scanning is - * done is initialized as starting from the least node. - */ - -/* - * __bt_seq -- - * Btree sequential scan interface. - * - * Parameters: - * dbp: pointer to access method - * key: key for positioning and return value - * data: data return value - * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. - * - * Returns: - * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. - */ -int -__bt_seq(dbp, key, data, flags) - const DB *dbp; - DBT *key, *data; - u_int flags; -{ - BTREE *t; - EPG e; - int status; - - t = dbp->internal; - - /* Toss any page pinned across calls. */ - if (t->bt_pinned != NULL) { - mpool_put(t->bt_mp, t->bt_pinned, 0); - t->bt_pinned = NULL; - } - - /* - * If scan uninitialized as yet, or starting at a specific record, set - * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin - * the page the cursor references if they're successful. - */ - switch (flags) { - case R_NEXT: - case R_PREV: - if (F_ISSET(&t->bt_cursor, CURS_INIT)) { - status = __bt_seqadv(t, &e, flags); - break; - } - /* FALLTHROUGH */ - case R_FIRST: - case R_LAST: - case R_CURSOR: - status = __bt_seqset(t, &e, key, flags); - break; - default: - errno = EINVAL; - return (RET_ERROR); - } - - if (status == RET_SUCCESS) { - __bt_setcur(t, e.page->pgno, e.index); - - status = - __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); - - /* - * If the user is doing concurrent access, we copied the - * key/data, toss the page. - */ - if (F_ISSET(t, B_DB_LOCK)) - mpool_put(t->bt_mp, e.page, 0); - else - t->bt_pinned = e.page; - } - return (status); -} - -/* - * __bt_seqset -- - * Set the sequential scan to a specific key. - * - * Parameters: - * t: tree - * ep: storage for returned key - * key: key for initial scan position - * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV - * - * Side effects: - * Pins the page the cursor references. - * - * Returns: - * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. - */ -static int -__bt_seqset(t, ep, key, flags) - BTREE *t; - EPG *ep; - DBT *key; - int flags; -{ - PAGE *h; - pgno_t pg; - int exact; - - /* - * Find the first, last or specific key in the tree and point the - * cursor at it. The cursor may not be moved until a new key has - * been found. - */ - switch (flags) { - case R_CURSOR: /* Keyed scan. */ - /* - * Find the first instance of the key or the smallest key - * which is greater than or equal to the specified key. - */ - if (key->data == NULL || key->size == 0) { - errno = EINVAL; - return (RET_ERROR); - } - return (__bt_first(t, key, ep, &exact)); - case R_FIRST: /* First record. */ - case R_NEXT: - /* Walk down the left-hand side of the tree. */ - for (pg = P_ROOT;;) { - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - - /* Check for an empty tree. */ - if (NEXTINDEX(h) == 0) { - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - } - - if (h->flags & (P_BLEAF | P_RLEAF)) - break; - pg = GETBINTERNAL(h, 0)->pgno; - mpool_put(t->bt_mp, h, 0); - } - ep->page = h; - ep->index = 0; - break; - case R_LAST: /* Last record. */ - case R_PREV: - /* Walk down the right-hand side of the tree. */ - for (pg = P_ROOT;;) { - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - - /* Check for an empty tree. */ - if (NEXTINDEX(h) == 0) { - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - } - - if (h->flags & (P_BLEAF | P_RLEAF)) - break; - pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; - mpool_put(t->bt_mp, h, 0); - } - - ep->page = h; - ep->index = NEXTINDEX(h) - 1; - break; - } - return (RET_SUCCESS); -} - -/* - * __bt_seqadvance -- - * Advance the sequential scan. - * - * Parameters: - * t: tree - * flags: R_NEXT, R_PREV - * - * Side effects: - * Pins the page the new key/data record is on. - * - * Returns: - * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. - */ -static int -__bt_seqadv(t, ep, flags) - BTREE *t; - EPG *ep; - int flags; -{ - CURSOR *c; - PAGE *h; - indx_t index; - pgno_t pg; - int exact; - - /* - * There are a couple of states that we can be in. The cursor has - * been initialized by the time we get here, but that's all we know. - */ - c = &t->bt_cursor; - - /* - * The cursor was deleted where there weren't any duplicate records, - * so the key was saved. Find out where that key would go in the - * current tree. It doesn't matter if the returned key is an exact - * match or not -- if it's an exact match, the record was added after - * the delete so we can just return it. If not, as long as there's - * a record there, return it. - */ - if (F_ISSET(c, CURS_ACQUIRE)) - return (__bt_first(t, &c->key, ep, &exact)); - - /* Get the page referenced by the cursor. */ - if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) - return (RET_ERROR); - - /* - * Find the next/previous record in the tree and point the cursor at - * it. The cursor may not be moved until a new key has been found. - */ - switch (flags) { - case R_NEXT: /* Next record. */ - /* - * The cursor was deleted in duplicate records, and moved - * forward to a record that has yet to be returned. Clear - * that flag, and return the record. - */ - if (F_ISSET(c, CURS_AFTER)) - goto usecurrent; - index = c->pg.index; - if (++index == NEXTINDEX(h)) { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - index = 0; - } - break; - case R_PREV: /* Previous record. */ - /* - * The cursor was deleted in duplicate records, and moved - * backward to a record that has yet to be returned. Clear - * that flag, and return the record. - */ - if (F_ISSET(c, CURS_BEFORE)) { -usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); - ep->page = h; - ep->index = c->pg.index; - return (RET_SUCCESS); - } - index = c->pg.index; - if (index == 0) { - pg = h->prevpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - index = NEXTINDEX(h) - 1; - } else - --index; - break; - } - - ep->page = h; - ep->index = index; - return (RET_SUCCESS); -} - -/* - * __bt_first -- - * Find the first entry. - * - * Parameters: - * t: the tree - * key: the key - * erval: return EPG - * exactp: pointer to exact match flag - * - * Returns: - * The first entry in the tree greater than or equal to key, - * or RET_SPECIAL if no such key exists. - */ -static int -__bt_first(t, key, erval, exactp) - BTREE *t; - const DBT *key; - EPG *erval; - int *exactp; -{ - PAGE *h; - EPG *ep, save; - pgno_t pg; - - /* - * Find any matching record; __bt_search pins the page. - * - * If it's an exact match and duplicates are possible, walk backwards - * in the tree until we find the first one. Otherwise, make sure it's - * a valid key (__bt_search may return an index just past the end of a - * page) and return it. - */ - if ((ep = __bt_search(t, key, exactp)) == NULL) - return (RET_SPECIAL); - if (*exactp) { - if (F_ISSET(t, B_NODUPS)) { - *erval = *ep; - return (RET_SUCCESS); - } - - /* - * Walk backwards, as long as the entry matches and there are - * keys left in the tree. Save a copy of each match in case - * we go too far. - */ - save = *ep; - h = ep->page; - do { - if (save.page->pgno != ep->page->pgno) { - mpool_put(t->bt_mp, save.page, 0); - save = *ep; - } else - save.index = ep->index; - - /* - * Don't unpin the page the last (or original) match - * was on, but make sure it's unpinned if an error - * occurs. - */ - if (ep->index == 0) { - if (h->prevpg == P_INVALID) - break; - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, - h->prevpg, 0)) == NULL) { - if (h->pgno == save.page->pgno) - mpool_put(t->bt_mp, - save.page, 0); - return (RET_ERROR); - } - ep->page = h; - ep->index = NEXTINDEX(h); - } - --ep->index; - } while (__bt_cmp(t, key, ep) == 0); - - /* - * Reach here with the last page that was looked at pinned, - * which may or may not be the same as the last (or original) - * match page. If it's not useful, release it. - */ - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, 0); - - *erval = save; - return (RET_SUCCESS); - } - - /* If at the end of a page, find the next entry. */ - if (ep->index == NEXTINDEX(ep->page)) { - h = ep->page; - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - ep->index = 0; - ep->page = h; - } - *erval = *ep; - return (RET_SUCCESS); -} - -/* - * __bt_setcur -- - * Set the cursor to an entry in the tree. - * - * Parameters: - * t: the tree - * pgno: page number - * index: page index - */ -void -__bt_setcur(t, pgno, index) - BTREE *t; - pgno_t pgno; - u_int index; -{ - /* Lose any already deleted key. */ - if (t->bt_cursor.key.data != NULL) { - free(t->bt_cursor.key.data); - t->bt_cursor.key.size = 0; - t->bt_cursor.key.data = NULL; - } - F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); - - /* Update the cursor. */ - t->bt_cursor.pg.pgno = pgno; - t->bt_cursor.pg.index = index; - F_SET(&t->bt_cursor, CURS_INIT); -} diff --git a/db/btree/bt_split.c b/db/btree/bt_split.c deleted file mode 100644 index 4119ccbf79..0000000000 --- a/db/btree/bt_split.c +++ /dev/null @@ -1,829 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_split.c 8.9 (Berkeley) 7/26/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <db.h> -#include "btree.h" - -static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); -static PAGE *bt_page - __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); -static int bt_preserve __P((BTREE *, pgno_t)); -static PAGE *bt_psplit - __P((BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t)); -static PAGE *bt_root - __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); -static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); -static recno_t rec_total __P((PAGE *)); - -#ifdef STATISTICS -u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; -#endif - -/* - * __BT_SPLIT -- Split the tree. - * - * Parameters: - * t: tree - * sp: page to split - * key: key to insert - * data: data to insert - * flags: BIGKEY/BIGDATA flags - * ilen: insert length - * skip: index to leave open - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -int -__bt_split(t, sp, key, data, flags, ilen, argskip) - BTREE *t; - PAGE *sp; - const DBT *key, *data; - int flags; - size_t ilen; - u_int32_t argskip; -{ - BINTERNAL *bi; - BLEAF *bl, *tbl; - DBT a, b; - EPGNO *parent; - PAGE *h, *l, *r, *lchild, *rchild; - indx_t nxtindex; - u_int16_t skip; - u_int32_t n, nbytes, nksize; - int parentsplit; - char *dest; - - /* - * Split the page into two pages, l and r. The split routines return - * a pointer to the page into which the key should be inserted and with - * skip set to the offset which should be used. Additionally, l and r - * are pinned. - */ - skip = argskip; - h = sp->pgno == P_ROOT ? - bt_root(t, sp, &l, &r, &skip, ilen) : - bt_page(t, sp, &l, &r, &skip, ilen); - if (h == NULL) - return (RET_ERROR); - - /* - * Insert the new key/data pair into the leaf page. (Key inserts - * always cause a leaf page to split first.) - */ - h->linp[skip] = h->upper -= ilen; - dest = (char *)h + h->upper; - if (F_ISSET(t, R_RECNO)) - WR_RLEAF(dest, data, flags) - else - WR_BLEAF(dest, key, data, flags) - - /* If the root page was split, make it look right. */ - if (sp->pgno == P_ROOT && - (F_ISSET(t, R_RECNO) ? - bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) - goto err2; - - /* - * Now we walk the parent page stack -- a LIFO stack of the pages that - * were traversed when we searched for the page that split. Each stack - * entry is a page number and a page index offset. The offset is for - * the page traversed on the search. We've just split a page, so we - * have to insert a new key into the parent page. - * - * If the insert into the parent page causes it to split, may have to - * continue splitting all the way up the tree. We stop if the root - * splits or the page inserted into didn't have to split to hold the - * new key. Some algorithms replace the key for the old page as well - * as the new page. We don't, as there's no reason to believe that the - * first key on the old page is any better than the key we have, and, - * in the case of a key being placed at index 0 causing the split, the - * key is unavailable. - * - * There are a maximum of 5 pages pinned at any time. We keep the left - * and right pages pinned while working on the parent. The 5 are the - * two children, left parent and right parent (when the parent splits) - * and the root page or the overflow key page when calling bt_preserve. - * This code must make sure that all pins are released other than the - * root page or overflow page which is unlocked elsewhere. - */ - while ((parent = BT_POP(t)) != NULL) { - lchild = l; - rchild = r; - - /* Get the parent page. */ - if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) - goto err2; - - /* - * The new key goes ONE AFTER the index, because the split - * was to the right. - */ - skip = parent->index + 1; - - /* - * Calculate the space needed on the parent page. - * - * Prefix trees: space hack when inserting into BINTERNAL - * pages. Retain only what's needed to distinguish between - * the new entry and the LAST entry on the page to its left. - * If the keys compare equal, retain the entire key. Note, - * we don't touch overflow keys, and the entire key must be - * retained for the next-to-left most key on the leftmost - * page of each level, or the search will fail. Applicable - * ONLY to internal pages that have leaf pages as children. - * Further reduction of the key between pairs of internal - * pages loses too much information. - */ - switch (rchild->flags & P_TYPE) { - case P_BINTERNAL: - bi = GETBINTERNAL(rchild, 0); - nbytes = NBINTERNAL(bi->ksize); - break; - case P_BLEAF: - bl = GETBLEAF(rchild, 0); - nbytes = NBINTERNAL(bl->ksize); - if (t->bt_pfx && !(bl->flags & P_BIGKEY) && - (h->prevpg != P_INVALID || skip > 1)) { - tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); - a.size = tbl->ksize; - a.data = tbl->bytes; - b.size = bl->ksize; - b.data = bl->bytes; - nksize = t->bt_pfx(&a, &b); - n = NBINTERNAL(nksize); - if (n < nbytes) { -#ifdef STATISTICS - bt_pfxsaved += nbytes - n; -#endif - nbytes = n; - } else - nksize = 0; - } else - nksize = 0; - break; - case P_RINTERNAL: - case P_RLEAF: - nbytes = NRINTERNAL; - break; - default: - abort(); - } - - /* Split the parent page if necessary or shift the indices. */ - if ((u_int32_t) (h->upper - h->lower) - < nbytes + sizeof(indx_t)) { - sp = h; - h = h->pgno == P_ROOT ? - bt_root(t, h, &l, &r, &skip, nbytes) : - bt_page(t, h, &l, &r, &skip, nbytes); - if (h == NULL) - goto err1; - parentsplit = 1; - } else { - if (skip < (nxtindex = NEXTINDEX(h))) - memmove(h->linp + skip + 1, h->linp + skip, - (nxtindex - skip) * sizeof(indx_t)); - h->lower += sizeof(indx_t); - parentsplit = 0; - } - - /* Insert the key into the parent page. */ - switch (rchild->flags & P_TYPE) { - case P_BINTERNAL: - h->linp[skip] = h->upper -= nbytes; - dest = (char *)h + h->linp[skip]; - memmove(dest, bi, nbytes); - ((BINTERNAL *)dest)->pgno = rchild->pgno; - break; - case P_BLEAF: - h->linp[skip] = h->upper -= nbytes; - dest = (char *)h + h->linp[skip]; - WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, - rchild->pgno, bl->flags & P_BIGKEY); - memmove(dest, bl->bytes, nksize ? nksize : bl->ksize); - if (bl->flags & P_BIGKEY && - bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) - goto err1; - break; - case P_RINTERNAL: - /* - * Update the left page count. If split - * added at index 0, fix the correct page. - */ - if (skip > 0) - dest = (char *)h + h->linp[skip - 1]; - else - dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; - ((RINTERNAL *)dest)->nrecs = rec_total(lchild); - ((RINTERNAL *)dest)->pgno = lchild->pgno; - - /* Update the right page count. */ - h->linp[skip] = h->upper -= nbytes; - dest = (char *)h + h->linp[skip]; - ((RINTERNAL *)dest)->nrecs = rec_total(rchild); - ((RINTERNAL *)dest)->pgno = rchild->pgno; - break; - case P_RLEAF: - /* - * Update the left page count. If split - * added at index 0, fix the correct page. - */ - if (skip > 0) - dest = (char *)h + h->linp[skip - 1]; - else - dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; - ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); - ((RINTERNAL *)dest)->pgno = lchild->pgno; - - /* Update the right page count. */ - h->linp[skip] = h->upper -= nbytes; - dest = (char *)h + h->linp[skip]; - ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); - ((RINTERNAL *)dest)->pgno = rchild->pgno; - break; - default: - abort(); - } - - /* Unpin the held pages. */ - if (!parentsplit) { - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - break; - } - - /* If the root page was split, make it look right. */ - if (sp->pgno == P_ROOT && - (F_ISSET(t, R_RECNO) ? - bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) - goto err1; - - mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); - mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); - } - - /* Unpin the held pages. */ - mpool_put(t->bt_mp, l, MPOOL_DIRTY); - mpool_put(t->bt_mp, r, MPOOL_DIRTY); - - /* Clear any pages left on the stack. */ - return (RET_SUCCESS); - - /* - * If something fails in the above loop we were already walking back - * up the tree and the tree is now inconsistent. Nothing much we can - * do about it but release any memory we're holding. - */ -err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); - mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); - -err2: mpool_put(t->bt_mp, l, 0); - mpool_put(t->bt_mp, r, 0); - __dbpanic(t->bt_dbp); - return (RET_ERROR); -} - -/* - * BT_PAGE -- Split a non-root page of a btree. - * - * Parameters: - * t: tree - * h: root page - * lp: pointer to left page pointer - * rp: pointer to right page pointer - * skip: pointer to index to leave open - * ilen: insert length - * - * Returns: - * Pointer to page in which to insert or NULL on error. - */ -static PAGE * -bt_page(t, h, lp, rp, skip, ilen) - BTREE *t; - PAGE *h, **lp, **rp; - indx_t *skip; - size_t ilen; -{ - PAGE *l, *r, *tp; - pgno_t npg; - -#ifdef STATISTICS - ++bt_split; -#endif - /* Put the new right page for the split into place. */ - if ((r = __bt_new(t, &npg)) == NULL) - return (NULL); - r->pgno = npg; - r->lower = BTDATAOFF; - r->upper = t->bt_psize; - r->nextpg = h->nextpg; - r->prevpg = h->pgno; - r->flags = h->flags & P_TYPE; - - /* - * If we're splitting the last page on a level because we're appending - * a key to it (skip is NEXTINDEX()), it's likely that the data is - * sorted. Adding an empty page on the side of the level is less work - * and can push the fill factor much higher than normal. If we're - * wrong it's no big deal, we'll just do the split the right way next - * time. It may look like it's equally easy to do a similar hack for - * reverse sorted data, that is, split the tree left, but it's not. - * Don't even try. - */ - if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { -#ifdef STATISTICS - ++bt_sortsplit; -#endif - h->nextpg = r->pgno; - r->lower = BTDATAOFF + sizeof(indx_t); - *skip = 0; - *lp = h; - *rp = r; - return (r); - } - - /* Put the new left page for the split into place. */ - if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) { - mpool_put(t->bt_mp, r, 0); - return (NULL); - } -#ifdef PURIFY - memset(l, 0xff, t->bt_psize); -#endif - l->pgno = h->pgno; - l->nextpg = r->pgno; - l->prevpg = h->prevpg; - l->lower = BTDATAOFF; - l->upper = t->bt_psize; - l->flags = h->flags & P_TYPE; - - /* Fix up the previous pointer of the page after the split page. */ - if (h->nextpg != P_INVALID) { - if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { - free(l); - /* XXX mpool_free(t->bt_mp, r->pgno); */ - return (NULL); - } - tp->prevpg = r->pgno; - mpool_put(t->bt_mp, tp, MPOOL_DIRTY); - } - - /* - * Split right. The key/data pairs aren't sorted in the btree page so - * it's simpler to copy the data from the split page onto two new pages - * instead of copying half the data to the right page and compacting - * the left page in place. Since the left page can't change, we have - * to swap the original and the allocated left page after the split. - */ - tp = bt_psplit(t, h, l, r, skip, ilen); - - /* Move the new left page onto the old left page. */ - memmove(h, l, t->bt_psize); - if (tp == l) - tp = h; - free(l); - - *lp = h; - *rp = r; - return (tp); -} - -/* - * BT_ROOT -- Split the root page of a btree. - * - * Parameters: - * t: tree - * h: root page - * lp: pointer to left page pointer - * rp: pointer to right page pointer - * skip: pointer to index to leave open - * ilen: insert length - * - * Returns: - * Pointer to page in which to insert or NULL on error. - */ -static PAGE * -bt_root(t, h, lp, rp, skip, ilen) - BTREE *t; - PAGE *h, **lp, **rp; - indx_t *skip; - size_t ilen; -{ - PAGE *l, *r, *tp; - pgno_t lnpg, rnpg; - -#ifdef STATISTICS - ++bt_split; - ++bt_rootsplit; -#endif - /* Put the new left and right pages for the split into place. */ - if ((l = __bt_new(t, &lnpg)) == NULL || - (r = __bt_new(t, &rnpg)) == NULL) - return (NULL); - l->pgno = lnpg; - r->pgno = rnpg; - l->nextpg = r->pgno; - r->prevpg = l->pgno; - l->prevpg = r->nextpg = P_INVALID; - l->lower = r->lower = BTDATAOFF; - l->upper = r->upper = t->bt_psize; - l->flags = r->flags = h->flags & P_TYPE; - - /* Split the root page. */ - tp = bt_psplit(t, h, l, r, skip, ilen); - - *lp = l; - *rp = r; - return (tp); -} - -/* - * BT_RROOT -- Fix up the recno root page after it has been split. - * - * Parameters: - * t: tree - * h: root page - * l: left page - * r: right page - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -static int -bt_rroot(t, h, l, r) - BTREE *t; - PAGE *h, *l, *r; -{ - char *dest; - - /* Insert the left and right keys, set the header information. */ - h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; - dest = (char *)h + h->upper; - WR_RINTERNAL(dest, - l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); - - h->linp[1] = h->upper -= NRINTERNAL; - dest = (char *)h + h->upper; - WR_RINTERNAL(dest, - r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); - - h->lower = BTDATAOFF + 2 * sizeof(indx_t); - - /* Unpin the root page, set to recno internal page. */ - h->flags &= ~P_TYPE; - h->flags |= P_RINTERNAL; - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - - return (RET_SUCCESS); -} - -/* - * BT_BROOT -- Fix up the btree root page after it has been split. - * - * Parameters: - * t: tree - * h: root page - * l: left page - * r: right page - * - * Returns: - * RET_ERROR, RET_SUCCESS - */ -static int -bt_broot(t, h, l, r) - BTREE *t; - PAGE *h, *l, *r; -{ - BINTERNAL *bi; - BLEAF *bl; - u_int32_t nbytes; - char *dest; - - /* - * If the root page was a leaf page, change it into an internal page. - * We copy the key we split on (but not the key's data, in the case of - * a leaf page) to the new root page. - * - * The btree comparison code guarantees that the left-most key on any - * level of the tree is never used, so it doesn't need to be filled in. - */ - nbytes = NBINTERNAL(0); - h->linp[0] = h->upper = t->bt_psize - nbytes; - dest = (char *)h + h->upper; - WR_BINTERNAL(dest, 0, l->pgno, 0); - - switch (h->flags & P_TYPE) { - case P_BLEAF: - bl = GETBLEAF(r, 0); - nbytes = NBINTERNAL(bl->ksize); - h->linp[1] = h->upper -= nbytes; - dest = (char *)h + h->upper; - WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); - memmove(dest, bl->bytes, bl->ksize); - - /* - * If the key is on an overflow page, mark the overflow chain - * so it isn't deleted when the leaf copy of the key is deleted. - */ - if (bl->flags & P_BIGKEY && - bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) - return (RET_ERROR); - break; - case P_BINTERNAL: - bi = GETBINTERNAL(r, 0); - nbytes = NBINTERNAL(bi->ksize); - h->linp[1] = h->upper -= nbytes; - dest = (char *)h + h->upper; - memmove(dest, bi, nbytes); - ((BINTERNAL *)dest)->pgno = r->pgno; - break; - default: - abort(); - } - - /* There are two keys on the page. */ - h->lower = BTDATAOFF + 2 * sizeof(indx_t); - - /* Unpin the root page, set to btree internal page. */ - h->flags &= ~P_TYPE; - h->flags |= P_BINTERNAL; - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - - return (RET_SUCCESS); -} - -/* - * BT_PSPLIT -- Do the real work of splitting the page. - * - * Parameters: - * t: tree - * h: page to be split - * l: page to put lower half of data - * r: page to put upper half of data - * pskip: pointer to index to leave open - * ilen: insert length - * - * Returns: - * Pointer to page in which to insert. - */ -static PAGE * -bt_psplit(t, h, l, r, pskip, ilen) - BTREE *t; - PAGE *h, *l, *r; - indx_t *pskip; - size_t ilen; -{ - BINTERNAL *bi; - BLEAF *bl; - CURSOR *c; - RLEAF *rl; - PAGE *rval; - void *src; - indx_t full, half, nxt, off, skip, top, used; - u_int32_t nbytes; - int bigkeycnt, isbigkey; - - /* - * Split the data to the left and right pages. Leave the skip index - * open. Additionally, make some effort not to split on an overflow - * key. This makes internal page processing faster and can save - * space as overflow keys used by internal pages are never deleted. - */ - bigkeycnt = 0; - skip = *pskip; - full = t->bt_psize - BTDATAOFF; - half = full / 2; - used = 0; - for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { - if (skip == off) { - nbytes = ilen; - isbigkey = 0; /* XXX: not really known. */ - } else - switch (h->flags & P_TYPE) { - case P_BINTERNAL: - src = bi = GETBINTERNAL(h, nxt); - nbytes = NBINTERNAL(bi->ksize); - isbigkey = bi->flags & P_BIGKEY; - break; - case P_BLEAF: - src = bl = GETBLEAF(h, nxt); - nbytes = NBLEAF(bl); - isbigkey = bl->flags & P_BIGKEY; - break; - case P_RINTERNAL: - src = GETRINTERNAL(h, nxt); - nbytes = NRINTERNAL; - isbigkey = 0; - break; - case P_RLEAF: - src = rl = GETRLEAF(h, nxt); - nbytes = NRLEAF(rl); - isbigkey = 0; - break; - default: - abort(); - } - - /* - * If the key/data pairs are substantial fractions of the max - * possible size for the page, it's possible to get situations - * where we decide to try and copy too much onto the left page. - * Make sure that doesn't happen. - */ - if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) - || nxt == top - 1) { - --off; - break; - } - - /* Copy the key/data pair, if not the skipped index. */ - if (skip != off) { - ++nxt; - - l->linp[off] = l->upper -= nbytes; - memmove((char *)l + l->upper, src, nbytes); - } - - used += nbytes + sizeof(indx_t); - if (used >= half) { - if (!isbigkey || bigkeycnt == 3) - break; - else - ++bigkeycnt; - } - } - - /* - * Off is the last offset that's valid for the left page. - * Nxt is the first offset to be placed on the right page. - */ - l->lower += (off + 1) * sizeof(indx_t); - - /* - * If splitting the page that the cursor was on, the cursor has to be - * adjusted to point to the same record as before the split. If the - * cursor is at or past the skipped slot, the cursor is incremented by - * one. If the cursor is on the right page, it is decremented by the - * number of records split to the left page. - */ - c = &t->bt_cursor; - if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { - if (c->pg.index >= skip) - ++c->pg.index; - if (c->pg.index < nxt) /* Left page. */ - c->pg.pgno = l->pgno; - else { /* Right page. */ - c->pg.pgno = r->pgno; - c->pg.index -= nxt; - } - } - - /* - * If the skipped index was on the left page, just return that page. - * Otherwise, adjust the skip index to reflect the new position on - * the right page. - */ - if (skip <= off) { - skip = 0; - rval = l; - } else { - rval = r; - *pskip -= nxt; - } - - for (off = 0; nxt < top; ++off) { - if (skip == nxt) { - ++off; - skip = 0; - } - switch (h->flags & P_TYPE) { - case P_BINTERNAL: - src = bi = GETBINTERNAL(h, nxt); - nbytes = NBINTERNAL(bi->ksize); - break; - case P_BLEAF: - src = bl = GETBLEAF(h, nxt); - nbytes = NBLEAF(bl); - break; - case P_RINTERNAL: - src = GETRINTERNAL(h, nxt); - nbytes = NRINTERNAL; - break; - case P_RLEAF: - src = rl = GETRLEAF(h, nxt); - nbytes = NRLEAF(rl); - break; - default: - abort(); - } - ++nxt; - r->linp[off] = r->upper -= nbytes; - memmove((char *)r + r->upper, src, nbytes); - } - r->lower += off * sizeof(indx_t); - - /* If the key is being appended to the page, adjust the index. */ - if (skip == top) - r->lower += sizeof(indx_t); - - return (rval); -} - -/* - * BT_PRESERVE -- Mark a chain of pages as used by an internal node. - * - * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the - * record that references them gets deleted. Chains pointed to by internal - * pages never get deleted. This routine marks a chain as pointed to by an - * internal page. - * - * Parameters: - * t: tree - * pg: page number of first page in the chain. - * - * Returns: - * RET_SUCCESS, RET_ERROR. - */ -static int -bt_preserve(t, pg) - BTREE *t; - pgno_t pg; -{ - PAGE *h; - - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - h->flags |= P_PRESERVE; - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - return (RET_SUCCESS); -} - -/* - * REC_TOTAL -- Return the number of recno entries below a page. - * - * Parameters: - * h: page - * - * Returns: - * The number of recno entries below a page. - * - * XXX - * These values could be set by the bt_psplit routine. The problem is that the - * entry has to be popped off of the stack etc. or the values have to be passed - * all the way back to bt_split/bt_rroot and it's not very clean. - */ -static recno_t -rec_total(h) - PAGE *h; -{ - recno_t recs; - indx_t nxt, top; - - for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) - recs += GETRINTERNAL(h, nxt)->nrecs; - return (recs); -} diff --git a/db/btree/bt_utils.c b/db/btree/bt_utils.c deleted file mode 100644 index 1416c7862d..0000000000 --- a/db/btree/bt_utils.c +++ /dev/null @@ -1,260 +0,0 @@ -/*- - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/param.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <db.h> -#include "btree.h" - -/* - * __bt_ret -- - * Build return key/data pair. - * - * Parameters: - * t: tree - * e: key/data pair to be returned - * key: user's key structure (NULL if not to be filled in) - * rkey: memory area to hold key - * data: user's data structure (NULL if not to be filled in) - * rdata: memory area to hold data - * copy: always copy the key/data item - * - * Returns: - * RET_SUCCESS, RET_ERROR. - */ -int -__bt_ret(t, e, key, rkey, data, rdata, copy) - BTREE *t; - EPG *e; - DBT *key, *rkey, *data, *rdata; - int copy; -{ - BLEAF *bl; - void *p; - - bl = GETBLEAF(e->page, e->index); - - /* - * We must copy big keys/data to make them contiguous. Otherwise, - * leave the page pinned and don't copy unless the user specified - * concurrent access. - */ - if (key == NULL) - goto dataonly; - - if (bl->flags & P_BIGKEY) { - if (__ovfl_get(t, bl->bytes, - &key->size, &rkey->data, &rkey->size)) - return (RET_ERROR); - key->data = rkey->data; - } else if (copy || F_ISSET(t, B_DB_LOCK)) { - if (bl->ksize > rkey->size) { - p = (void *)(rkey->data == NULL ? - malloc(bl->ksize) : realloc(rkey->data, bl->ksize)); - if (p == NULL) - return (RET_ERROR); - rkey->data = p; - rkey->size = bl->ksize; - } - memmove(rkey->data, bl->bytes, bl->ksize); - key->size = bl->ksize; - key->data = rkey->data; - } else { - key->size = bl->ksize; - key->data = bl->bytes; - } - -dataonly: - if (data == NULL) - return (RET_SUCCESS); - - if (bl->flags & P_BIGDATA) { - if (__ovfl_get(t, bl->bytes + bl->ksize, - &data->size, &rdata->data, &rdata->size)) - return (RET_ERROR); - data->data = rdata->data; - } else if (copy || F_ISSET(t, B_DB_LOCK)) { - /* Use +1 in case the first record retrieved is 0 length. */ - if (bl->dsize + 1 > rdata->size) { - p = (void *)(rdata->data == NULL ? - malloc(bl->dsize + 1) : - realloc(rdata->data, bl->dsize + 1)); - if (p == NULL) - return (RET_ERROR); - rdata->data = p; - rdata->size = bl->dsize + 1; - } - memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize); - data->size = bl->dsize; - data->data = rdata->data; - } else { - data->size = bl->dsize; - data->data = bl->bytes + bl->ksize; - } - - return (RET_SUCCESS); -} - -/* - * __BT_CMP -- Compare a key to a given record. - * - * Parameters: - * t: tree - * k1: DBT pointer of first arg to comparison - * e: pointer to EPG for comparison - * - * Returns: - * < 0 if k1 is < record - * = 0 if k1 is = record - * > 0 if k1 is > record - */ -int -__bt_cmp(t, k1, e) - BTREE *t; - const DBT *k1; - EPG *e; -{ - BINTERNAL *bi; - BLEAF *bl; - DBT k2; - PAGE *h; - void *bigkey; - - /* - * The left-most key on internal pages, at any level of the tree, is - * guaranteed by the following code to be less than any user key. - * This saves us from having to update the leftmost key on an internal - * page when the user inserts a new key in the tree smaller than - * anything we've yet seen. - */ - h = e->page; - if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF)) - return (1); - - bigkey = NULL; - if (h->flags & P_BLEAF) { - bl = GETBLEAF(h, e->index); - if (bl->flags & P_BIGKEY) - bigkey = bl->bytes; - else { - k2.data = bl->bytes; - k2.size = bl->ksize; - } - } else { - bi = GETBINTERNAL(h, e->index); - if (bi->flags & P_BIGKEY) - bigkey = bi->bytes; - else { - k2.data = bi->bytes; - k2.size = bi->ksize; - } - } - - if (bigkey) { - if (__ovfl_get(t, bigkey, - &k2.size, &t->bt_rdata.data, &t->bt_rdata.size)) - return (RET_ERROR); - k2.data = t->bt_rdata.data; - } - return ((*t->bt_cmp)(k1, &k2)); -} - -/* - * __BT_DEFCMP -- Default comparison routine. - * - * Parameters: - * a: DBT #1 - * b: DBT #2 - * - * Returns: - * < 0 if a is < b - * = 0 if a is = b - * > 0 if a is > b - */ -int -__bt_defcmp(a, b) - const DBT *a, *b; -{ - register size_t len; - register u_char *p1, *p2; - - /* - * XXX - * If a size_t doesn't fit in an int, this routine can lose. - * What we need is a integral type which is guaranteed to be - * larger than a size_t, and there is no such thing. - */ - len = MIN(a->size, b->size); - for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) - if (*p1 != *p2) - return ((int)*p1 - (int)*p2); - return ((int)a->size - (int)b->size); -} - -/* - * __BT_DEFPFX -- Default prefix routine. - * - * Parameters: - * a: DBT #1 - * b: DBT #2 - * - * Returns: - * Number of bytes needed to distinguish b from a. - */ -size_t -__bt_defpfx(a, b) - const DBT *a, *b; -{ - register u_char *p1, *p2; - register size_t cnt, len; - - cnt = 1; - len = MIN(a->size, b->size); - for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt) - if (*p1 != *p2) - return (cnt); - - /* a->size must be <= b->size, or they wouldn't be in this order. */ - return (a->size < b->size ? a->size + 1 : a->size); -} diff --git a/db/btree/btree.h b/db/btree/btree.h deleted file mode 100644 index 45f7c94ed4..0000000000 --- a/db/btree/btree.h +++ /dev/null @@ -1,395 +0,0 @@ -/*- - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)btree.h 8.11 (Berkeley) 8/17/94 - */ - -/* Macros to set/clear/test flags. */ -#define F_SET(p, f) (p)->flags |= (f) -#define F_CLR(p, f) (p)->flags &= ~(f) -#define F_ISSET(p, f) ((p)->flags & (f)) - -#include <mpool.h> - -#ifdef _LIBC -/* In the GNU C library we must not pollute the namespace because libdb is - needed by libnss_db. */ -#define mpool_open __mpool_open -#define mpool_filter __mpool_filter -#define mpool_new __mpool_new -#define mpool_get __mpool_get -#define mpool_put __mpool_put -#define mpool_sync __mpool_sync -#define mpool_close __mpool_close -#endif - -#define DEFMINKEYPAGE (2) /* Minimum keys per page */ -#define MINCACHE (5) /* Minimum cached pages */ -#define MINPSIZE (512) /* Minimum page size */ - -/* - * Page 0 of a btree file contains a copy of the meta-data. This page is also - * used as an out-of-band page, i.e. page pointers that point to nowhere point - * to page 0. Page 1 is the root of the btree. - */ -#define P_INVALID 0 /* Invalid tree page number. */ -#define P_META 0 /* Tree metadata page number. */ -#define P_ROOT 1 /* Tree root page number. */ - -/* - * There are five page layouts in the btree: btree internal pages (BINTERNAL), - * btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages - * (RLEAF) and overflow pages. All five page types have a page header (PAGE). - * This implementation requires that values within structures NOT be padded. - * (ANSI C permits random padding.) If your compiler pads randomly you'll have - * to do some work to get this package to run. - */ -typedef struct _page { - pgno_t pgno; /* this page's page number */ - pgno_t prevpg; /* left sibling */ - pgno_t nextpg; /* right sibling */ - -#define P_BINTERNAL 0x01 /* btree internal page */ -#define P_BLEAF 0x02 /* leaf page */ -#define P_OVERFLOW 0x04 /* overflow page */ -#define P_RINTERNAL 0x08 /* recno internal page */ -#define P_RLEAF 0x10 /* leaf page */ -#define P_TYPE 0x1f /* type mask */ -#define P_PRESERVE 0x20 /* never delete this chain of pages */ - u_int32_t flags; - - indx_t lower; /* lower bound of free space on page */ - indx_t upper; /* upper bound of free space on page */ - indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */ -} PAGE; - -/* First and next index. */ -#define BTDATAOFF \ - (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ - sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) -#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t)) - -/* - * For pages other than overflow pages, there is an array of offsets into the - * rest of the page immediately following the page header. Each offset is to - * an item which is unique to the type of page. The h_lower offset is just - * past the last filled-in index. The h_upper offset is the first item on the - * page. Offsets are from the beginning of the page. - * - * If an item is too big to store on a single page, a flag is set and the item - * is a { page, size } pair such that the page is the first page of an overflow - * chain with size bytes of item. Overflow pages are simply bytes without any - * external structure. - * - * The page number and size fields in the items are pgno_t-aligned so they can - * be manipulated without copying. (This presumes that 32 bit items can be - * manipulated on this system.) - */ -#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) -#define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t)) - -/* - * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno} - * pairs, such that the key compares less than or equal to all of the records - * on that page. For a tree without duplicate keys, an internal page with two - * consecutive keys, a and b, will have all records greater than or equal to a - * and less than b stored on the page associated with a. Duplicate keys are - * somewhat special and can cause duplicate internal and leaf page records and - * some minor modifications of the above rule. - */ -typedef struct _binternal { - u_int32_t ksize; /* key size */ - pgno_t pgno; /* page number stored on */ -#define P_BIGDATA 0x01 /* overflow data */ -#define P_BIGKEY 0x02 /* overflow key */ - u_char flags; - char bytes[1]; /* data */ -} BINTERNAL; - -/* Get the page's BINTERNAL structure at index indx. */ -#define GETBINTERNAL(pg, indx) \ - ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx])) - -/* Get the number of bytes in the entry. */ -#define NBINTERNAL(len) \ - LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) - -/* Copy a BINTERNAL entry to the page. */ -#define WR_BINTERNAL(p, size, pgno, flags) { \ - *(u_int32_t *)p = size; \ - p += sizeof(u_int32_t); \ - *(pgno_t *)p = pgno; \ - p += sizeof(pgno_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ -} - -/* - * For the recno internal pages, the item is a page number with the number of - * keys found on that page and below. - */ -typedef struct _rinternal { - recno_t nrecs; /* number of records */ - pgno_t pgno; /* page number stored below */ -} RINTERNAL; - -/* Get the page's RINTERNAL structure at index indx. */ -#define GETRINTERNAL(pg, indx) \ - ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx])) - -/* Get the number of bytes in the entry. */ -#define NRINTERNAL \ - LALIGN(sizeof(recno_t) + sizeof(pgno_t)) - -/* Copy a RINTERNAL entry to the page. */ -#define WR_RINTERNAL(p, nrecs, pgno) { \ - *(recno_t *)p = nrecs; \ - p += sizeof(recno_t); \ - *(pgno_t *)p = pgno; \ -} - -/* For the btree leaf pages, the item is a key and data pair. */ -typedef struct _bleaf { - u_int32_t ksize; /* size of key */ - u_int32_t dsize; /* size of data */ - u_char flags; /* P_BIGDATA, P_BIGKEY */ - char bytes[1]; /* data */ -} BLEAF; - -/* Get the page's BLEAF structure at index indx. */ -#define GETBLEAF(pg, indx) \ - ((BLEAF *)((char *)(pg) + (pg)->linp[indx])) - -/* Get the number of bytes in the entry. */ -#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize) - -/* Get the number of bytes in the user's key/data pair. */ -#define NBLEAFDBT(ksize, dsize) \ - LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \ - (ksize) + (dsize)) - -/* Copy a BLEAF entry to the page. */ -#define WR_BLEAF(p, key, data, flags) { \ - *(u_int32_t *)p = key->size; \ - p += sizeof(u_int32_t); \ - *(u_int32_t *)p = data->size; \ - p += sizeof(u_int32_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ - memmove(p, key->data, key->size); \ - p += key->size; \ - memmove(p, data->data, data->size); \ -} - -/* For the recno leaf pages, the item is a data entry. */ -typedef struct _rleaf { - u_int32_t dsize; /* size of data */ - u_char flags; /* P_BIGDATA */ - char bytes[1]; -} RLEAF; - -/* Get the page's RLEAF structure at index indx. */ -#define GETRLEAF(pg, indx) \ - ((RLEAF *)((char *)(pg) + (pg)->linp[indx])) - -/* Get the number of bytes in the entry. */ -#define NRLEAF(p) NRLEAFDBT((p)->dsize) - -/* Get the number of bytes from the user's data. */ -#define NRLEAFDBT(dsize) \ - LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize)) - -/* Copy a RLEAF entry to the page. */ -#define WR_RLEAF(p, data, flags) { \ - *(u_int32_t *)p = data->size; \ - p += sizeof(u_int32_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ - memmove(p, data->data, data->size); \ -} - -/* - * A record in the tree is either a pointer to a page and an index in the page - * or a page number and an index. These structures are used as a cursor, stack - * entry and search returns as well as to pass records to other routines. - * - * One comment about searches. Internal page searches must find the largest - * record less than key in the tree so that descents work. Leaf page searches - * must find the smallest record greater than key so that the returned index - * is the record's correct position for insertion. - */ -typedef struct _epgno { - pgno_t pgno; /* the page number */ - indx_t index; /* the index on the page */ -} EPGNO; - -typedef struct _epg { - PAGE *page; /* the (pinned) page */ - indx_t index; /* the index on the page */ -} EPG; - -/* - * About cursors. The cursor (and the page that contained the key/data pair - * that it referenced) can be deleted, which makes things a bit tricky. If - * there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set - * or there simply aren't any duplicates of the key) we copy the key that it - * referenced when it's deleted, and reacquire a new cursor key if the cursor - * is used again. If there are duplicates keys, we move to the next/previous - * key, and set a flag so that we know what happened. NOTE: if duplicate (to - * the cursor) keys are added to the tree during this process, it is undefined - * if they will be returned or not in a cursor scan. - * - * The flags determine the possible states of the cursor: - * - * CURS_INIT The cursor references *something*. - * CURS_ACQUIRE The cursor was deleted, and a key has been saved so that - * we can reacquire the right position in the tree. - * CURS_AFTER, CURS_BEFORE - * The cursor was deleted, and now references a key/data pair - * that has not yet been returned, either before or after the - * deleted key/data pair. - * XXX - * This structure is broken out so that we can eventually offer multiple - * cursors as part of the DB interface. - */ -typedef struct _cursor { - EPGNO pg; /* B: Saved tree reference. */ - DBT key; /* B: Saved key, or key.data == NULL. */ - recno_t rcursor; /* R: recno cursor (1-based) */ - -#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */ -#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */ -#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */ -#define CURS_INIT 0x08 /* RB: Cursor initialized. */ - u_int8_t flags; -} CURSOR; - -/* - * The metadata of the tree. The nrecs field is used only by the RECNO code. - * This is because the btree doesn't really need it and it requires that every - * put or delete call modify the metadata. - */ -typedef struct _btmeta { - u_int32_t magic; /* magic number */ - u_int32_t version; /* version */ - u_int32_t psize; /* page size */ - u_int32_t free; /* page number of first free page */ - u_int32_t nrecs; /* R: number of records */ - -#define SAVEMETA (B_NODUPS | R_RECNO) - u_int32_t flags; /* bt_flags & SAVEMETA */ -} BTMETA; - -/* The in-memory btree/recno data structure. */ -typedef struct _btree { - MPOOL *bt_mp; /* memory pool cookie */ - - DB *bt_dbp; /* pointer to enclosing DB */ - - EPG bt_cur; /* current (pinned) page */ - PAGE *bt_pinned; /* page pinned across calls */ - - CURSOR bt_cursor; /* cursor */ - -#define BT_PUSH(t, p, i) { \ - t->bt_sp->pgno = p; \ - t->bt_sp->index = i; \ - ++t->bt_sp; \ -} -#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp) -#define BT_CLR(t) (t->bt_sp = t->bt_stack) - EPGNO bt_stack[50]; /* stack of parent pages */ - EPGNO *bt_sp; /* current stack pointer */ - - DBT bt_rkey; /* returned key */ - DBT bt_rdata; /* returned data */ - - int bt_fd; /* tree file descriptor */ - - pgno_t bt_free; /* next free page */ - u_int32_t bt_psize; /* page size */ - indx_t bt_ovflsize; /* cut-off for key/data overflow */ - int bt_lorder; /* byte order */ - /* sorted order */ - enum { NOT, BACK, FORWARD } bt_order; - EPGNO bt_last; /* last insert */ - - /* B: key comparison function */ - int (*bt_cmp) __P((const DBT *, const DBT *)); - /* B: prefix comparison function */ - size_t (*bt_pfx) __P((const DBT *, const DBT *)); - /* R: recno input function */ - int (*bt_irec) __P((struct _btree *, recno_t)); - - FILE *bt_rfp; /* R: record FILE pointer */ - int bt_rfd; /* R: record file descriptor */ - - caddr_t bt_cmap; /* R: current point in mapped space */ - caddr_t bt_smap; /* R: start of mapped space */ - caddr_t bt_emap; /* R: end of mapped space */ - size_t bt_msize; /* R: size of mapped region. */ - - recno_t bt_nrecs; /* R: number of records */ - size_t bt_reclen; /* R: fixed record length */ - u_char bt_bval; /* R: delimiting byte/pad character */ - -/* - * NB: - * B_NODUPS and R_RECNO are stored on disk, and may not be changed. - */ -#define B_INMEM 0x00001 /* in-memory tree */ -#define B_METADIRTY 0x00002 /* need to write metadata */ -#define B_MODIFIED 0x00004 /* tree modified */ -#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */ -#define B_RDONLY 0x00010 /* read-only tree */ - -#define B_NODUPS 0x00020 /* no duplicate keys permitted */ -#define R_RECNO 0x00080 /* record oriented tree */ - -#define R_CLOSEFP 0x00040 /* opened a file pointer */ -#define R_EOF 0x00100 /* end of input file reached. */ -#define R_FIXLEN 0x00200 /* fixed length records */ -#define R_MEMMAPPED 0x00400 /* memory mapped file. */ -#define R_INMEM 0x00800 /* in-memory file */ -#define R_MODIFIED 0x01000 /* modified file */ -#define R_RDONLY 0x02000 /* read-only file */ - -#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */ -#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */ -#define B_DB_TXN 0x10000 /* DB_TXN specified. */ - u_int32_t flags; -} BTREE; - -#include "extern.h" diff --git a/db/btree/extern.h b/db/btree/extern.h deleted file mode 100644 index ebd9c54923..0000000000 --- a/db/btree/extern.h +++ /dev/null @@ -1,70 +0,0 @@ -/*- - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)extern.h 8.10 (Berkeley) 7/20/94 - */ - -int __bt_close __P((DB *)); -int __bt_cmp __P((BTREE *, const DBT *, EPG *)); -int __bt_crsrdel __P((BTREE *, EPGNO *)); -int __bt_defcmp __P((const DBT *, const DBT *)); -size_t __bt_defpfx __P((const DBT *, const DBT *)); -int __bt_delete __P((const DB *, const DBT *, u_int)); -int __bt_dleaf __P((BTREE *, const DBT *, PAGE *, u_int)); -int __bt_fd __P((const DB *)); -int __bt_free __P((BTREE *, PAGE *)); -int __bt_get __P((const DB *, const DBT *, DBT *, u_int)); -PAGE *__bt_new __P((BTREE *, pgno_t *)); -void __bt_pgin __P((void *, pgno_t, void *)); -void __bt_pgout __P((void *, pgno_t, void *)); -int __bt_push __P((BTREE *, pgno_t, int)); -int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int)); -int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int)); -EPG *__bt_search __P((BTREE *, const DBT *, int *)); -int __bt_seq __P((const DB *, DBT *, DBT *, u_int)); -void __bt_setcur __P((BTREE *, pgno_t, u_int)); -int __bt_split __P((BTREE *, PAGE *, - const DBT *, const DBT *, int, size_t, u_int32_t)); -int __bt_sync __P((const DB *, u_int)); - -int __ovfl_delete __P((BTREE *, void *)); -int __ovfl_get __P((BTREE *, void *, size_t *, void **, size_t *)); -int __ovfl_put __P((BTREE *, const DBT *, pgno_t *)); - -#ifdef DEBUG -void __bt_dnpage __P((DB *, pgno_t)); -void __bt_dpage __P((PAGE *)); -void __bt_dump __P((DB *)); -#endif -#ifdef STATISTICS -void __bt_stat __P((DB *)); -#endif |