From 8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 2 Jan 2000 06:01:06 +0000 Subject: Update. * Makeconfig (all-subdirs): Remove db and db2. * db/*: Removed. * db2/*: Removed. --- db2/btree/bt_compare.c | 195 ----- db2/btree/bt_conv.c | 94 --- db2/btree/bt_curadj.c | 272 ------- db2/btree/bt_cursor.c | 1913 ------------------------------------------------ db2/btree/bt_delete.c | 589 --------------- db2/btree/bt_open.c | 310 -------- db2/btree/bt_page.c | 317 -------- db2/btree/bt_put.c | 831 --------------------- db2/btree/bt_rec.c | 903 ----------------------- db2/btree/bt_recno.c | 1356 ---------------------------------- db2/btree/bt_rsearch.c | 391 ---------- db2/btree/bt_search.c | 369 ---------- db2/btree/bt_split.c | 966 ------------------------ db2/btree/bt_stat.c | 198 ----- db2/btree/btree_auto.c | 1508 -------------------------------------- 15 files changed, 10212 deletions(-) delete mode 100644 db2/btree/bt_compare.c delete mode 100644 db2/btree/bt_conv.c delete mode 100644 db2/btree/bt_curadj.c delete mode 100644 db2/btree/bt_cursor.c delete mode 100644 db2/btree/bt_delete.c delete mode 100644 db2/btree/bt_open.c delete mode 100644 db2/btree/bt_page.c delete mode 100644 db2/btree/bt_put.c delete mode 100644 db2/btree/bt_rec.c delete mode 100644 db2/btree/bt_recno.c delete mode 100644 db2/btree/bt_rsearch.c delete mode 100644 db2/btree/bt_search.c delete mode 100644 db2/btree/bt_split.c delete mode 100644 db2/btree/bt_stat.c delete mode 100644 db2/btree/btree_auto.c (limited to 'db2/btree') diff --git a/db2/btree/bt_compare.c b/db2/btree/bt_compare.c deleted file mode 100644 index c60f920612..0000000000 --- a/db2/btree/bt_compare.c +++ /dev/null @@ -1,195 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_compare.c 10.14 (Sleepycat) 10/9/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_cmp -- - * Compare a key to a given record. - * - * PUBLIC: int __bam_cmp __P((DB *, const DBT *, - * PUBLIC: PAGE *, u_int32_t, int (*)(const DBT *, const DBT *))); - */ -int -__bam_cmp(dbp, dbt, h, indx, func) - DB *dbp; - const DBT *dbt; - PAGE *h; - u_int32_t indx; - int (*func)__P((const DBT *, const DBT *)); -{ - BINTERNAL *bi; - BKEYDATA *bk; - BOVERFLOW *bo; - DBT pg_dbt; - int ret; - - /* - * Returns: - * < 0 if dbt is < page record - * = 0 if dbt is = page record - * > 0 if dbt is > page record - * - * !!! - * We do not clear the pg_dbt DBT even though it's likely to contain - * random bits. That should be okay, because the app's comparison - * routine had better not be looking at fields other than data/size. - * We don't clear it because we go through this path a lot and it's - * expensive. - */ - if (TYPE(h) == P_LBTREE || TYPE(h) == P_DUPLICATE) { - bk = GET_BKEYDATA(h, indx); - if (B_TYPE(bk->type) == B_OVERFLOW) - bo = (BOVERFLOW *)bk; - else { - pg_dbt.data = bk->data; - pg_dbt.size = bk->len; - return (func(dbt, &pg_dbt)); - } - } else { - /* - * The following code guarantees that the left-most key on an - * internal page at any level of the btree is less than any - * user specified key. This saves us from having to update the - * leftmost key on an internal page when the user inserts a new - * key in the tree smaller than anything we've seen before. - */ - if (indx == 0 && h->prev_pgno == PGNO_INVALID) - return (1); - - bi = GET_BINTERNAL(h, indx); - if (B_TYPE(bi->type) == B_OVERFLOW) - bo = (BOVERFLOW *)(bi->data); - else { - pg_dbt.data = bi->data; - pg_dbt.size = bi->len; - return (func(dbt, &pg_dbt)); - } - } - - /* - * Overflow. - * - * XXX - * We ignore __db_moff() errors, because we have no way of returning - * them. - */ - (void) __db_moff(dbp, - dbt, bo->pgno, bo->tlen, func == __bam_defcmp ? NULL : func, &ret); - return (ret); -} - -/* - * __bam_defcmp -- - * Default comparison routine. - * - * PUBLIC: int __bam_defcmp __P((const DBT *, const DBT *)); - */ -int -__bam_defcmp(a, b) - const DBT *a, *b; -{ - size_t len; - u_int8_t *p1, *p2; - - /* - * Returns: - * < 0 if a is < b - * = 0 if a is = b - * > 0 if a is > b - * - * XXX - * If a size_t doesn't fit into a long, or if the difference between - * any two characters doesn't fit into an int, this routine can lose. - * What we need is a signed integral type that's guaranteed to be at - * least as large as a size_t, and there is no such thing. - */ - len = a->size > b->size ? b->size : a->size; - for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) - if (*p1 != *p2) - return ((long)*p1 - (long)*p2); - return ((long)a->size - (long)b->size); -} - -/* - * __bam_defpfx -- - * Default prefix routine. - * - * PUBLIC: size_t __bam_defpfx __P((const DBT *, const DBT *)); - */ -size_t -__bam_defpfx(a, b) - const DBT *a, *b; -{ - size_t cnt, len; - u_int8_t *p1, *p2; - - cnt = 1; - len = a->size > b->size ? b->size : a->size; - for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt) - if (*p1 != *p2) - return (cnt); - - /* - * We know that a->size must be <= b->size, or they wouldn't be - * in this order. - */ - return (a->size < b->size ? a->size + 1 : a->size); -} diff --git a/db2/btree/bt_conv.c b/db2/btree/bt_conv.c deleted file mode 100644 index a3069082ae..0000000000 --- a/db2/btree/bt_conv.c +++ /dev/null @@ -1,94 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_conv.c 10.7 (Sleepycat) 9/20/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "btree.h" - -/* - * __bam_pgin -- - * Convert host-specific page layout from the host-independent format - * stored on disk. - * - * PUBLIC: int __bam_pgin __P((db_pgno_t, void *, DBT *)); - */ -int -__bam_pgin(pg, pp, cookie) - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB_PGINFO *pginfo; - - pginfo = (DB_PGINFO *)cookie->data; - if (!pginfo->needswap) - return (0); - return (pg == PGNO_METADATA ? - __bam_mswap(pp) : __db_pgin(pg, pginfo->db_pagesize, pp)); -} - -/* - * __bam_pgout -- - * Convert host-specific page layout to the host-independent format - * stored on disk. - * - * PUBLIC: int __bam_pgout __P((db_pgno_t, void *, DBT *)); - */ -int -__bam_pgout(pg, pp, cookie) - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB_PGINFO *pginfo; - - pginfo = (DB_PGINFO *)cookie->data; - if (!pginfo->needswap) - return (0); - return (pg == PGNO_METADATA ? - __bam_mswap(pp) : __db_pgout(pg, pginfo->db_pagesize, pp)); -} - -/* - * __bam_mswap -- - * Swap the bytes on the btree metadata page. - * - * PUBLIC: int __bam_mswap __P((PAGE *)); - */ -int -__bam_mswap(pg) - PAGE *pg; -{ - u_int8_t *p; - - p = (u_int8_t *)pg; - - /* Swap the meta-data information. */ - SWAP32(p); /* lsn.file */ - SWAP32(p); /* lsn.offset */ - SWAP32(p); /* pgno */ - SWAP32(p); /* magic */ - SWAP32(p); /* version */ - SWAP32(p); /* pagesize */ - SWAP32(p); /* maxkey */ - SWAP32(p); /* minkey */ - SWAP32(p); /* free */ - SWAP32(p); /* flags */ - - return (0); -} diff --git a/db2/btree/bt_curadj.c b/db2/btree/bt_curadj.c deleted file mode 100644 index 9b86fbb6d7..0000000000 --- a/db2/btree/bt_curadj.c +++ /dev/null @@ -1,272 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_curadj.c 10.69 (Sleepycat) 12/2/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -#ifdef DEBUG -/* - * __bam_cprint -- - * Display the current cursor list. - * - * PUBLIC: int __bam_cprint __P((DB *)); - */ -int -__bam_cprint(dbp) - DB *dbp; -{ - CURSOR *cp; - DBC *dbc; - - DB_THREAD_LOCK(dbp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (CURSOR *)dbc->internal; - fprintf(stderr, - "%#0x->%#0x: page: %lu index: %lu dpage %lu dindex: %lu recno: %lu", - (u_int)dbc, (u_int)cp, (u_long)cp->pgno, (u_long)cp->indx, - (u_long)cp->dpgno, (u_long)cp->dindx, (u_long)cp->recno); - if (F_ISSET(cp, C_DELETED)) - fprintf(stderr, " (deleted)"); - fprintf(stderr, "\n"); - } - DB_THREAD_UNLOCK(dbp); - - return (0); -} -#endif /* DEBUG */ - -/* - * __bam_ca_delete -- - * Update the cursors when items are deleted and when already deleted - * items are overwritten. Return the number of relevant cursors found. - * - * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int)); - */ -int -__bam_ca_delete(dbp, pgno, indx, delete) - DB *dbp; - db_pgno_t pgno; - u_int32_t indx; - int delete; -{ - DBC *dbc; - CURSOR *cp; - int count; /* !!!: Has to contain max number of cursors. */ - - /* Recno is responsible for its own adjustments. */ - if (dbp->type == DB_RECNO) - return (0); - - /* - * Adjust the cursors. We don't have to review the cursors for any - * thread of control other than the current one, because we have the - * page write locked at this point, and any other thread of control - * had better be using a different locker ID, meaning only cursors in - * our thread of control can be on the page. - * - * It's possible for multiple cursors within the thread to have write - * locks on the same page, but, cursors within a thread must be single - * threaded, so all we're locking here is the cursor linked list. - */ - DB_THREAD_LOCK(dbp); - for (count = 0, dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (CURSOR *)dbc->internal; - - if ((cp->pgno == pgno && cp->indx == indx) || - (cp->dpgno == pgno && cp->dindx == indx)) { - if (delete) - F_SET(cp, C_DELETED); - else - F_CLR(cp, C_DELETED); - ++count; - } - } - DB_THREAD_UNLOCK(dbp); - - return (count); -} - -/* - * __bam_ca_di -- - * Adjust the cursors during a delete or insert. - * - * PUBLIC: void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int)); - */ -void -__bam_ca_di(dbp, pgno, indx, adjust) - DB *dbp; - db_pgno_t pgno; - u_int32_t indx; - int adjust; -{ - CURSOR *cp; - DBC *dbc; - - /* Recno is responsible for its own adjustments. */ - if (dbp->type == DB_RECNO) - return; - - /* - * Adjust the cursors. See the comment in __bam_ca_delete(). - */ - DB_THREAD_LOCK(dbp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (CURSOR *)dbc->internal; - if (cp->pgno == pgno && cp->indx >= indx) - cp->indx += adjust; - if (cp->dpgno == pgno && cp->dindx >= indx) - cp->dindx += adjust; - } - DB_THREAD_UNLOCK(dbp); -} - -/* - * __bam_ca_dup -- - * Adjust the cursors when moving items from a leaf page to a duplicates - * page. - * - * PUBLIC: void __bam_ca_dup __P((DB *, - * PUBLIC: db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t)); - */ -void -__bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti) - DB *dbp; - db_pgno_t fpgno, tpgno; - u_int32_t first, fi, ti; -{ - CURSOR *cp; - DBC *dbc; - - /* Recno is responsible for its own adjustments. */ - if (dbp->type == DB_RECNO) - return; - - /* - * Adjust the cursors. See the comment in __bam_ca_delete(). - */ - DB_THREAD_LOCK(dbp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (CURSOR *)dbc->internal; - /* - * Ignore matching entries that have already been moved, - * we move from the same location on the leaf page more - * than once. - */ - if (cp->dpgno == PGNO_INVALID && - cp->pgno == fpgno && cp->indx == fi) { - cp->indx = first; - cp->dpgno = tpgno; - cp->dindx = ti; - } - } - DB_THREAD_UNLOCK(dbp); -} - -/* - * __bam_ca_rsplit -- - * Adjust the cursors when doing reverse splits. - * - * PUBLIC: void __bam_ca_rsplit __P((DB *, db_pgno_t, db_pgno_t)); - */ -void -__bam_ca_rsplit(dbp, fpgno, tpgno) - DB *dbp; - db_pgno_t fpgno, tpgno; -{ - CURSOR *cp; - DBC *dbc; - - /* Recno is responsible for its own adjustments. */ - if (dbp->type == DB_RECNO) - return; - - /* - * Adjust the cursors. See the comment in __bam_ca_delete(). - */ - DB_THREAD_LOCK(dbp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (CURSOR *)dbc->internal; - if (cp->pgno == fpgno) - cp->pgno = tpgno; - } - DB_THREAD_UNLOCK(dbp); -} - -/* - * __bam_ca_split -- - * Adjust the cursors when splitting a page. - * - * PUBLIC: void __bam_ca_split __P((DB *, - * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int)); - */ -void -__bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft) - DB *dbp; - db_pgno_t ppgno, lpgno, rpgno; - u_int32_t split_indx; - int cleft; -{ - DBC *dbc; - CURSOR *cp; - - /* Recno is responsible for its own adjustments. */ - if (dbp->type == DB_RECNO) - return; - - /* - * Adjust the cursors. See the comment in __bam_ca_delete(). - * - * If splitting the page that a cursor was on, the cursor has to be - * adjusted to point to the same record as before the split. Most - * of the time we don't adjust pointers to the left page, because - * we're going to copy its contents back over the original page. If - * the cursor is on the right page, it is decremented by the number of - * records split to the left page. - */ - DB_THREAD_LOCK(dbp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (CURSOR *)dbc->internal; - if (cp->pgno == ppgno) { - if (cp->indx < split_indx) { - if (cleft) - cp->pgno = lpgno; - } else { - cp->pgno = rpgno; - cp->indx -= split_indx; - } - } - if (cp->dpgno == ppgno) { - if (cp->dindx < split_indx) { - if (cleft) - cp->dpgno = lpgno; - } else { - cp->dpgno = rpgno; - cp->dindx -= split_indx; - } - } - } - DB_THREAD_UNLOCK(dbp); -} diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c deleted file mode 100644 index 10bc095c9d..0000000000 --- a/db2/btree/bt_cursor.c +++ /dev/null @@ -1,1913 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_cursor.c 10.81 (Sleepycat) 12/16/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" -#include "shqueue.h" -#include "db_shash.h" -#include "lock.h" -#include "lock_ext.h" - -static int __bam_c_close __P((DBC *)); -static int __bam_c_del __P((DBC *, u_int32_t)); -static int __bam_c_destroy __P((DBC *)); -static int __bam_c_first __P((DBC *, CURSOR *)); -static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); -static int __bam_c_getstack __P((DBC *, CURSOR *)); -static int __bam_c_last __P((DBC *, CURSOR *)); -static int __bam_c_next __P((DBC *, CURSOR *, int)); -static int __bam_c_physdel __P((DBC *, CURSOR *, PAGE *)); -static int __bam_c_prev __P((DBC *, CURSOR *)); -static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); -static void __bam_c_reset __P((CURSOR *)); -static int __bam_c_rget __P((DBC *, DBT *, u_int32_t)); -static int __bam_c_search __P((DBC *, CURSOR *, const DBT *, u_int32_t, int *)); -static int __bam_dsearch __P((DBC *, CURSOR *, DBT *, u_int32_t *)); - -/* Discard the current page/lock held by a cursor. */ -#undef DISCARD -#define DISCARD(dbc, cp) { \ - if ((cp)->page != NULL) { \ - (void)memp_fput((dbc)->dbp->mpf, (cp)->page, 0); \ - (cp)->page = NULL; \ - } \ - if ((cp)->lock != LOCK_INVALID) { \ - (void)__BT_TLPUT((dbc), (cp)->lock); \ - (cp)->lock = LOCK_INVALID; \ - } \ -} - -/* If the cursor references a deleted record. */ -#undef IS_CUR_DELETED -#define IS_CUR_DELETED(cp) \ - (((cp)->dpgno == PGNO_INVALID && \ - B_DISSET(GET_BKEYDATA((cp)->page, \ - (cp)->indx + O_INDX)->type)) || \ - ((cp)->dpgno != PGNO_INVALID && \ - B_DISSET(GET_BKEYDATA((cp)->page, (cp)->dindx)->type))) - -/* If the cursor and index combination references a deleted record. */ -#undef IS_DELETED -#define IS_DELETED(cp, indx) \ - (((cp)->dpgno == PGNO_INVALID && \ - B_DISSET(GET_BKEYDATA((cp)->page, (indx) + O_INDX)->type)) || \ - ((cp)->dpgno != PGNO_INVALID && \ - B_DISSET(GET_BKEYDATA((cp)->page, (indx))->type))) - -/* - * Test to see if two cursors could point to duplicates of the same key, - * whether on-page or off-page. The leaf page numbers must be the same - * in both cases. In the case of off-page duplicates, the key indices - * on the leaf page will be the same. In the case of on-page duplicates, - * the duplicate page number must not be set, and the key index offsets - * must be the same. For the last test, as the saved copy of the cursor - * will not have a valid page pointer, we use the cursor's. - */ -#undef POSSIBLE_DUPLICATE -#define POSSIBLE_DUPLICATE(cursor, saved_copy) \ - ((cursor)->pgno == (saved_copy).pgno && \ - ((cursor)->indx == (saved_copy).indx || \ - ((cursor)->dpgno == PGNO_INVALID && \ - (saved_copy).dpgno == PGNO_INVALID && \ - (cursor)->page->inp[(cursor)->indx] == \ - (cursor)->page->inp[(saved_copy).indx]))) - -/* - * __bam_c_reset -- - * Initialize internal cursor structure. - */ -static void -__bam_c_reset(cp) - CURSOR *cp; -{ - cp->sp = cp->csp = cp->stack; - cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]); - cp->page = NULL; - cp->pgno = PGNO_INVALID; - cp->indx = 0; - cp->dpgno = PGNO_INVALID; - cp->dindx = 0; - cp->lock = LOCK_INVALID; - cp->mode = DB_LOCK_NG; - cp->recno = RECNO_OOB; - cp->flags = 0; -} - -/* - * __bam_c_init -- - * Initialize the access private portion of a cursor - * - * PUBLIC: int __bam_c_init __P((DBC *)); - */ -int -__bam_c_init(dbc) - DBC *dbc; -{ - DB *dbp; - CURSOR *cp; - int ret; - - if ((ret = __os_calloc(1, sizeof(CURSOR), &cp)) != 0) - return (ret); - - dbp = dbc->dbp; - cp->dbc = dbc; - - /* - * Logical record numbers are always the same size, and we don't want - * to have to check for space every time we return one. Allocate it - * in advance. - */ - if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) { - if ((ret = __os_malloc(sizeof(db_recno_t), - NULL, &dbc->rkey.data)) != 0) { - __os_free(cp, sizeof(CURSOR)); - return (ret); - } - dbc->rkey.ulen = sizeof(db_recno_t); - } - - /* Initialize methods. */ - dbc->internal = cp; - if (dbp->type == DB_BTREE) { - dbc->c_am_close = __bam_c_close; - dbc->c_am_destroy = __bam_c_destroy; - dbc->c_del = __bam_c_del; - dbc->c_get = __bam_c_get; - dbc->c_put = __bam_c_put; - } else { - dbc->c_am_close = __bam_c_close; - dbc->c_am_destroy = __bam_c_destroy; - dbc->c_del = __ram_c_del; - dbc->c_get = __ram_c_get; - dbc->c_put = __ram_c_put; - } - - /* Initialize dynamic information. */ - __bam_c_reset(cp); - - return (0); -} - -/* - * __bam_c_close -- - * Close down the cursor from a single use. - */ -static int -__bam_c_close(dbc) - DBC *dbc; -{ - CURSOR *cp; - DB *dbp; - int ret; - - dbp = dbc->dbp; - cp = dbc->internal; - ret = 0; - - /* - * If a cursor deleted a btree key, perform the actual deletion. - * (Recno keys are either deleted immediately or never deleted.) - */ - if (dbp->type == DB_BTREE && F_ISSET(cp, C_DELETED)) - ret = __bam_c_physdel(dbc, cp, NULL); - - /* Discard any locks not acquired inside of a transaction. */ - if (cp->lock != LOCK_INVALID) { - (void)__BT_TLPUT(dbc, cp->lock); - cp->lock = LOCK_INVALID; - } - - /* Sanity checks. */ -#ifdef DIAGNOSTIC - if (cp->csp != cp->stack) - __db_err(dbp->dbenv, "btree cursor close: stack not empty"); -#endif - - /* Initialize dynamic information. */ - __bam_c_reset(cp); - - return (ret); -} - -/* - * __bam_c_destroy -- - * Close a single cursor -- internal version. - */ -static int -__bam_c_destroy(dbc) - DBC *dbc; -{ - /* Discard the structures. */ - __os_free(dbc->internal, sizeof(CURSOR)); - - return (0); -} - -/* - * __bam_c_del -- - * Delete using a cursor. - */ -static int -__bam_c_del(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - CURSOR *cp; - DB *dbp; - DB_LOCK lock; - PAGE *h; - db_pgno_t pgno; - db_indx_t indx; - int ret; - - dbp = dbc->dbp; - cp = dbc->internal; - h = NULL; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_cdelchk(dbp, flags, - F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0) - return (ret); - - /* - * If we are running CDB, this had better be either a write - * cursor or an immediate writer. - */ - if (F_ISSET(dbp, DB_AM_CDB)) - if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) - return (EINVAL); - - DEBUG_LWRITE(dbc, dbc->txn, "bam_c_del", NULL, NULL, flags); - - /* If already deleted, return failure. */ - if (F_ISSET(cp, C_DELETED)) - return (DB_KEYEMPTY); - - /* - * We don't physically delete the record until the cursor moves, - * so we have to have a long-lived write lock on the page instead - * of a long-lived read lock. Note, we have to have a read lock - * to even get here, so we simply discard it. - */ - if (F_ISSET(dbp, DB_AM_LOCKING) && cp->mode != DB_LOCK_WRITE) { - if ((ret = __bam_lget(dbc, - 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0) - goto err; - (void)__BT_TLPUT(dbc, cp->lock); - cp->lock = lock; - cp->mode = DB_LOCK_WRITE; - } - - /* - * Acquire the underlying page (which may be different from the above - * page because it may be a duplicate page), and set the on-page and - * in-cursor delete flags. We don't need to lock it as we've already - * write-locked the page leading to it. - */ - if (cp->dpgno == PGNO_INVALID) { - pgno = cp->pgno; - indx = cp->indx; - } else { - pgno = cp->dpgno; - indx = cp->dindx; - } - - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) - goto err; - - /* Log the change. */ - if (DB_LOGGING(dbc) && - (ret = __bam_cdel_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h), - 0, dbp->log_fileid, PGNO(h), &LSN(h), indx)) != 0) { - (void)memp_fput(dbp->mpf, h, 0); - goto err; - } - - /* - * Set the intent-to-delete flag on the page and update all cursors. */ - if (cp->dpgno == PGNO_INVALID) - B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type); - else - B_DSET(GET_BKEYDATA(h, indx)->type); - (void)__bam_ca_delete(dbp, pgno, indx, 1); - - ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); - h = NULL; - - /* - * If the tree has record numbers, we have to adjust the counts. - * - * !!! - * This test is right -- we don't yet support duplicates and record - * numbers in the same tree, so ignore duplicates if DB_BT_RECNUM - * set. - */ - if (F_ISSET(dbp, DB_BT_RECNUM)) { - if ((ret = __bam_c_getstack(dbc, cp)) != 0) - goto err; - if ((ret = __bam_adjust(dbc, -1)) != 0) - goto err; - (void)__bam_stkrel(dbc, 0); - } - -err: if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); - return (ret); -} - -/* - * __bam_c_get -- - * Get using a cursor (btree). - */ -static int -__bam_c_get(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - CURSOR *cp, copy, start; - DB *dbp; - PAGE *h; - int exact, ret, tmp_rmw; - - dbp = dbc->dbp; - cp = dbc->internal; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_cgetchk(dbp, - key, data, flags, cp->pgno != PGNO_INVALID)) != 0) - return (ret); - - /* Clear OR'd in additional bits so we can check for flag equality. */ - tmp_rmw = 0; - if (LF_ISSET(DB_RMW)) { - if (!F_ISSET(dbp, DB_AM_CDB)) { - tmp_rmw = 1; - F_SET(dbc, DBC_RMW); - } - LF_CLR(DB_RMW); - } - - DEBUG_LREAD(dbc, dbc->txn, "bam_c_get", - flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); - - /* - * Return a cursor's record number. It has nothing to do with the - * cursor get code except that it's been rammed into the interface. - */ - if (flags == DB_GET_RECNO) { - ret = __bam_c_rget(dbc, data, flags); - if (tmp_rmw) - F_CLR(dbc, DBC_RMW); - return (ret); - } - - /* - * Initialize the cursor for a new retrieval. Clear the cursor's - * page pointer, it was set before this operation, and no longer - * has any meaning. - */ - cp->page = NULL; - copy = *cp; - cp->lock = LOCK_INVALID; - - switch (flags) { - case DB_CURRENT: - /* It's not possible to return a deleted record. */ - if (F_ISSET(cp, C_DELETED)) { - ret = DB_KEYEMPTY; - goto err; - } - - /* Acquire the current page. */ - if ((ret = __bam_lget(dbc, - 0, cp->pgno, DB_LOCK_READ, &cp->lock)) == 0) - ret = memp_fget(dbp->mpf, - cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno, - 0, &cp->page); - if (ret != 0) - goto err; - break; - case DB_NEXT_DUP: - if (cp->pgno == PGNO_INVALID) { - ret = EINVAL; - goto err; - } - if ((ret = __bam_c_next(dbc, cp, 1)) != 0) - goto err; - - /* Make sure we didn't go past the end of the duplicates. */ - if (!POSSIBLE_DUPLICATE(cp, copy)) { - ret = DB_NOTFOUND; - goto err; - } - break; - case DB_NEXT: - if (cp->pgno != PGNO_INVALID) { - if ((ret = __bam_c_next(dbc, cp, 1)) != 0) - goto err; - break; - } - /* FALLTHROUGH */ - case DB_FIRST: - if ((ret = __bam_c_first(dbc, cp)) != 0) - goto err; - break; - case DB_PREV: - if (cp->pgno != PGNO_INVALID) { - if ((ret = __bam_c_prev(dbc, cp)) != 0) - goto err; - break; - } - /* FALLTHROUGH */ - case DB_LAST: - if ((ret = __bam_c_last(dbc, cp)) != 0) - goto err; - break; - case DB_SET: - if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0) - goto err; - - /* - * We cannot currently be referencing a deleted record, but we - * may be referencing off-page duplicates. - * - * If we're referencing off-page duplicates, move off-page. - * If we moved off-page, move to the next non-deleted record. - * If we moved to the next non-deleted record, check to make - * sure we didn't switch records because our current record - * had no non-deleted data items. - */ - start = *cp; - if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) - goto err; - if (cp->dpgno != PGNO_INVALID && IS_CUR_DELETED(cp)) { - if ((ret = __bam_c_next(dbc, cp, 0)) != 0) - goto err; - if (!POSSIBLE_DUPLICATE(cp, start)) { - ret = DB_NOTFOUND; - goto err; - } - } - break; - case DB_SET_RECNO: - if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0) - goto err; - break; - case DB_GET_BOTH: - if (F_ISSET(dbc, DBC_CONTINUE | DBC_KEYSET)) { - /* Acquire the current page. */ - if ((ret = memp_fget(dbp->mpf, - cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno, - 0, &cp->page)) != 0) - goto err; - - /* If DBC_CONTINUE, move to the next item. */ - if (F_ISSET(dbc, DBC_CONTINUE) && - (ret = __bam_c_next(dbc, cp, 1)) != 0) - goto err; - } else { - if ((ret = - __bam_c_search(dbc, cp, key, flags, &exact)) != 0) - goto err; - - /* - * We may be referencing a duplicates page. Move to - * the first duplicate. - */ - if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) - goto err; - } - - /* Search for a matching entry. */ - if ((ret = __bam_dsearch(dbc, cp, data, NULL)) != 0) - goto err; - - /* Ignore deleted entries. */ - if (IS_CUR_DELETED(cp)) { - ret = DB_NOTFOUND; - goto err; - } - break; - case DB_SET_RANGE: - if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0) - goto err; - - /* - * As we didn't require an exact match, the search function - * may have returned an entry past the end of the page. If - * so, move to the next entry. - */ - if (cp->indx == NUM_ENT(cp->page) && - (ret = __bam_c_next(dbc, cp, 0)) != 0) - goto err; - - /* - * We may be referencing off-page duplicates, if so, move - * off-page. - */ - if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) - goto err; - - /* - * We may be referencing a deleted record, if so, move to - * the next non-deleted record. - */ - if (IS_CUR_DELETED(cp) && (ret = __bam_c_next(dbc, cp, 0)) != 0) - goto err; - break; - } - - /* - * Return the key if the user didn't give us one. If we've moved to - * a duplicate page, we may no longer have a pointer to the main page, - * so we have to go get it. We know that it's already read-locked, - * however, so we don't have to acquire a new lock. - */ - if (flags != DB_SET) { - if (cp->dpgno != PGNO_INVALID) { - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) - goto err; - } else - h = cp->page; - ret = __db_ret(dbp, - h, cp->indx, key, &dbc->rkey.data, &dbc->rkey.ulen); - if (cp->dpgno != PGNO_INVALID) - (void)memp_fput(dbp->mpf, h, 0); - if (ret) - goto err; - } - - /* Return the data. */ - if ((ret = __db_ret(dbp, cp->page, - cp->dpgno == PGNO_INVALID ? cp->indx + O_INDX : cp->dindx, - data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) - goto err; - - /* - * If the previous cursor record has been deleted, physically delete - * the entry from the page. We clear the deleted flag before we call - * the underlying delete routine so that, if an error occurs, and we - * restore the cursor, the deleted flag is cleared. This is because, - * if we manage to physically modify the page, and then restore the - * cursor, we might try to repeat the page modification when closing - * the cursor. - */ - if (F_ISSET(©, C_DELETED)) { - F_CLR(©, C_DELETED); - if ((ret = __bam_c_physdel(dbc, ©, cp->page)) != 0) - goto err; - } - F_CLR(cp, C_DELETED); - - /* Release the previous lock, if any; the current lock is retained. */ - if (copy.lock != LOCK_INVALID) - (void)__BT_TLPUT(dbc, copy.lock); - - /* Release the current page. */ - if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) - goto err; - - if (0) { -err: if (cp->page != NULL) - (void)memp_fput(dbp->mpf, cp->page, 0); - if (cp->lock != LOCK_INVALID) - (void)__BT_TLPUT(dbc, cp->lock); - *cp = copy; - } - - /* Release temporary lock upgrade. */ - if (tmp_rmw) - F_CLR(dbc, DBC_RMW); - - return (ret); -} - -/* - * __bam_dsearch -- - * Search for a matching data item (or the first data item that's - * equal to or greater than the one we're searching for). - */ -static int -__bam_dsearch(dbc, cp, data, iflagp) - DBC *dbc; - CURSOR *cp; - DBT *data; - u_int32_t *iflagp; -{ - DB *dbp; - CURSOR copy, last; - int cmp, ret; - - dbp = dbc->dbp; - - /* - * If iflagp is non-NULL, we're doing an insert. - * - * If the duplicates are off-page, use the duplicate search routine. - */ - if (cp->dpgno != PGNO_INVALID) { - if ((ret = __db_dsearch(dbc, iflagp != NULL, - data, cp->dpgno, &cp->dindx, &cp->page, &cmp)) != 0) - return (ret); - cp->dpgno = cp->page->pgno; - - if (iflagp == NULL) { - if (cmp != 0) - return (DB_NOTFOUND); - return (0); - } - *iflagp = DB_BEFORE; - return (0); - } - - /* Otherwise, do the search ourselves. */ - copy = *cp; - for (;;) { - /* Save the last interesting cursor position. */ - last = *cp; - - /* See if the data item matches the one we're looking for. */ - if ((cmp = __bam_cmp(dbp, data, cp->page, cp->indx + O_INDX, - dbp->dup_compare == NULL ? - __bam_defcmp : dbp->dup_compare)) == 0) { - if (iflagp != NULL) - *iflagp = DB_AFTER; - return (0); - } - - /* - * If duplicate entries are sorted, we're done if we find a - * page entry that sorts greater than the application item. - * If doing an insert, return success, otherwise DB_NOTFOUND. - */ - if (dbp->dup_compare != NULL && cmp < 0) { - if (iflagp == NULL) - return (DB_NOTFOUND); - *iflagp = DB_BEFORE; - return (0); - } - - /* - * Move to the next item. If we reach the end of the page and - * we're doing an insert, set the cursor to the last item and - * set the referenced memory location so callers know to insert - * after the item, instead of before it. If not inserting, we - * return DB_NOTFOUND. - */ - if ((cp->indx += P_INDX) >= NUM_ENT(cp->page)) { - if (iflagp == NULL) - return (DB_NOTFOUND); - goto use_last; - } - - /* - * Make sure we didn't go past the end of the duplicates. The - * error conditions are the same as above. - */ - if (!POSSIBLE_DUPLICATE(cp, copy)) { - if (iflagp == NULL) - return (DB_NOTFOUND); -use_last: *cp = last; - *iflagp = DB_AFTER; - return (0); - } - } - /* NOTREACHED */ -} - -/* - * __bam_c_rget -- - * Return the record number for a cursor. - */ -static int -__bam_c_rget(dbc, data, flags) - DBC *dbc; - DBT *data; - u_int32_t flags; -{ - CURSOR *cp; - DB *dbp; - DBT dbt; - db_recno_t recno; - int exact, ret; - - COMPQUIET(flags, 0); - dbp = dbc->dbp; - cp = dbc->internal; - - /* Get the page with the current item on it. */ - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) - return (ret); - - /* Get a copy of the key. */ - memset(&dbt, 0, sizeof(DBT)); - dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; - if ((ret = __db_ret(dbp, cp->page, cp->indx, &dbt, NULL, NULL)) != 0) - goto err; - - exact = 1; - if ((ret = __bam_search(dbc, &dbt, - F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, - 1, &recno, &exact)) != 0) - goto err; - - ret = __db_retcopy(data, &recno, sizeof(recno), - &dbc->rdata.data, &dbc->rdata.ulen, dbp->db_malloc); - - /* Release the stack. */ - __bam_stkrel(dbc, 0); - -err: (void)memp_fput(dbp->mpf, cp->page, 0); - __os_free(dbt.data, dbt.size); - return (ret); -} - -/* - * __bam_c_put -- - * Put using a cursor. - */ -static int -__bam_c_put(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - CURSOR *cp, copy; - DB *dbp; - DBT dbt; - db_indx_t indx; - db_pgno_t pgno; - u_int32_t iiflags, iiop; - int exact, needkey, ret, stack; - void *arg; - - dbp = dbc->dbp; - cp = dbc->internal; - - DB_PANIC_CHECK(dbp); - - DEBUG_LWRITE(dbc, dbc->txn, "bam_c_put", - flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL, - data, flags); - - if ((ret = __db_cputchk(dbp, key, data, flags, - F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0) - return (ret); - - /* - * If we are running CDB, this had better be either a write - * cursor or an immediate writer. If it's a regular writer, - * that means we have an IWRITE lock and we need to upgrade - * it to a write lock. - */ - if (F_ISSET(dbp, DB_AM_CDB)) { - if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) - return (EINVAL); - - if (F_ISSET(dbc, DBC_RMW) && - (ret = lock_get(dbp->dbenv->lk_info, dbc->locker, - DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &dbc->mylock)) != 0) - return (EAGAIN); - } - - if (0) { -split: /* - * To split, we need a valid key for the page. Since it's a - * cursor, we have to build one. - * - * Acquire a copy of a key from the page. - */ - if (needkey) { - memset(&dbt, 0, sizeof(DBT)); - if ((ret = __db_ret(dbp, cp->page, indx, - &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0) - goto err; - arg = &dbt; - } else - arg = key; - - /* - * Discard any locks and pinned pages (the locks are discarded - * even if we're running with transactions, as they lock pages - * that we're sorry we ever acquired). If stack is set and the - * cursor entries are valid, they point to the same entries as - * the stack, don't free them twice. - */ - if (stack) { - (void)__bam_stkrel(dbc, 1); - stack = 0; - } else - DISCARD(dbc, cp); - - /* - * Restore the cursor to its original value. This is necessary - * for two reasons. First, we are about to copy it in case of - * error, again. Second, we adjust cursors during the split, - * and we have to ensure this cursor is adjusted appropriately, - * along with all the other cursors. - */ - *cp = copy; - - if ((ret = __bam_split(dbc, arg)) != 0) - goto err; - } - - /* - * Initialize the cursor for a new retrieval. Clear the cursor's - * page pointer, it was set before this operation, and no longer - * has any meaning. - */ - cp->page = NULL; - copy = *cp; - cp->lock = LOCK_INVALID; - - iiflags = needkey = ret = stack = 0; - switch (flags) { - case DB_AFTER: - case DB_BEFORE: - case DB_CURRENT: - needkey = 1; - if (cp->dpgno == PGNO_INVALID) { - pgno = cp->pgno; - indx = cp->indx; - } else { - pgno = cp->dpgno; - indx = cp->dindx; - } - - /* - * !!! - * This test is right -- we don't yet support duplicates and - * record numbers in the same tree, so ignore duplicates if - * DB_BT_RECNUM set. - */ - if (F_ISSET(dbp, DB_BT_RECNUM) && - (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) { - /* Acquire a complete stack. */ - if ((ret = __bam_c_getstack(dbc, cp)) != 0) - goto err; - cp->page = cp->csp->page; - - stack = 1; - iiflags = BI_DOINCR; - } else { - /* Acquire the current page. */ - if ((ret = __bam_lget(dbc, - 0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0) - ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page); - if (ret != 0) - goto err; - - iiflags = 0; - } - - /* - * If the user has specified a duplicate comparison function, - * we return an error if DB_CURRENT was specified and the - * replacement data doesn't compare equal to the current data. - * This stops apps from screwing up the duplicate sort order. - */ - if (flags == DB_CURRENT && dbp->dup_compare != NULL) - if (__bam_cmp(dbp, data, - cp->page, indx, dbp->dup_compare) != 0) { - ret = EINVAL; - goto err; - } - - iiop = flags; - break; - case DB_KEYFIRST: - case DB_KEYLAST: - /* - * If we have a duplicate comparison function, we position to - * the first of any on-page duplicates, and use __bam_dsearch - * to search for the right slot. Otherwise, we position to - * the first/last of any on-page duplicates based on the flag - * value. - */ - if ((ret = __bam_c_search(dbc, cp, key, - flags == DB_KEYFIRST || dbp->dup_compare != NULL ? - DB_KEYFIRST : DB_KEYLAST, &exact)) != 0) - goto err; - stack = 1; - - /* - * If an exact match: - * If duplicates aren't supported, replace the current - * item. (When implementing the DB->put function, our - * caller has already checked the DB_NOOVERWRITE flag.) - * - * If there's a duplicate comparison function, find the - * correct slot for this duplicate item. - * - * If there's no duplicate comparison function, set the - * insert flag based on the argument flags. - * - * If there's no match, the search function returned the - * smallest slot greater than the key, use it. - */ - if (exact) { - if (F_ISSET(dbp, DB_AM_DUP)) { - /* - * If at off-page duplicate page, move to the - * first or last entry -- if a comparison - * function was specified, start searching at - * the first entry. Otherwise, move based on - * the DB_KEYFIRST/DB_KEYLAST flags. - */ - if ((ret = __bam_dup(dbc, cp, cp->indx, - dbp->dup_compare == NULL && - flags != DB_KEYFIRST)) != 0) - goto err; - - /* - * If there's a comparison function, search for - * the correct slot. Otherwise, set the insert - * flag based on the argment flag. - */ - if (dbp->dup_compare == NULL) - iiop = flags == DB_KEYFIRST ? - DB_BEFORE : DB_AFTER; - else - if ((ret = __bam_dsearch(dbc, - cp, data, &iiop)) != 0) - goto err; - } else - iiop = DB_CURRENT; - iiflags = 0; - } else { - iiop = DB_BEFORE; - iiflags = BI_NEWKEY; - } - - if (cp->dpgno == PGNO_INVALID) { - pgno = cp->pgno; - indx = cp->indx; - } else { - pgno = cp->dpgno; - indx = cp->dindx; - } - break; - } - - ret = __bam_iitem(dbc, &cp->page, &indx, key, data, iiop, iiflags); - - if (ret == DB_NEEDSPLIT) - goto split; - if (ret != 0) - goto err; - - /* - * Reset any cursors referencing this item that might have the item - * marked for deletion. - */ - if (iiop == DB_CURRENT) { - (void)__bam_ca_delete(dbp, pgno, indx, 0); - - /* - * It's also possible that we are the cursor that had the - * item marked for deletion, in which case we want to make - * sure that we don't delete it because we had the delete - * flag set already. - */ - if (cp->pgno == copy.pgno && cp->indx == copy.indx && - cp->dpgno == copy.dpgno && cp->dindx == copy.dindx) - F_CLR(©, C_DELETED); - } - - /* - * Update the cursor to point to the new entry. The new entry was - * stored on the current page, because we split pages until it was - * possible. - */ - if (cp->dpgno == PGNO_INVALID) - cp->indx = indx; - else - cp->dindx = indx; - - /* - * If the previous cursor record has been deleted, physically delete - * the entry from the page. We clear the deleted flag before we call - * the underlying delete routine so that, if an error occurs, and we - * restore the cursor, the deleted flag is cleared. This is because, - * if we manage to physically modify the page, and then restore the - * cursor, we might try to repeat the page modification when closing - * the cursor. - */ - if (F_ISSET(©, C_DELETED)) { - F_CLR(©, C_DELETED); - if ((ret = __bam_c_physdel(dbc, ©, cp->page)) != 0) - goto err; - } - F_CLR(cp, C_DELETED); - - /* Release the previous lock, if any; the current lock is retained. */ - if (copy.lock != LOCK_INVALID) - (void)__BT_TLPUT(dbc, copy.lock); - - /* - * Discard any pages pinned in the tree and their locks, except for - * the leaf page, for which we only discard the pin, not the lock. - * - * Note, the leaf page participated in the stack we acquired, and so - * we have to adjust the stack as necessary. If there was only a - * single page on the stack, we don't have to free further stack pages. - */ - if (stack && BT_STK_POP(cp) != NULL) - (void)__bam_stkrel(dbc, 0); - - /* Release the current page. */ - if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) - goto err; - - if (0) { -err: /* Discard any pinned pages. */ - if (stack) - (void)__bam_stkrel(dbc, 0); - else - DISCARD(dbc, cp); - *cp = copy; - } - - if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) - (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, - DB_LOCK_IWRITE, 0); - - return (ret); -} - -/* - * __bam_c_first -- - * Return the first record. - */ -static int -__bam_c_first(dbc, cp) - DBC *dbc; - CURSOR *cp; -{ - DB *dbp; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - - /* Walk down the left-hand side of the tree. */ - for (pgno = PGNO_ROOT;;) { - if ((ret = - __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - - /* If we find a leaf page, we're done. */ - if (ISLEAF(cp->page)) - break; - - pgno = GET_BINTERNAL(cp->page, 0)->pgno; - DISCARD(dbc, cp); - } - - cp->pgno = cp->page->pgno; - cp->indx = 0; - cp->dpgno = PGNO_INVALID; - - /* Check for duplicates. */ - if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) - return (ret); - - /* If on an empty page or a deleted record, move to the next one. */ - if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp)) - if ((ret = __bam_c_next(dbc, cp, 0)) != 0) - return (ret); - - return (0); -} - -/* - * __bam_c_last -- - * Return the last record. - */ -static int -__bam_c_last(dbc, cp) - DBC *dbc; - CURSOR *cp; -{ - DB *dbp; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - - /* Walk down the right-hand side of the tree. */ - for (pgno = PGNO_ROOT;;) { - if ((ret = - __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - - /* If we find a leaf page, we're done. */ - if (ISLEAF(cp->page)) - break; - - pgno = - GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno; - DISCARD(dbc, cp); - } - - cp->pgno = cp->page->pgno; - cp->indx = NUM_ENT(cp->page) == 0 ? 0 : NUM_ENT(cp->page) - P_INDX; - cp->dpgno = PGNO_INVALID; - - /* Check for duplicates. */ - if ((ret = __bam_dup(dbc, cp, cp->indx, 1)) != 0) - return (ret); - - /* If on an empty page or a deleted record, move to the next one. */ - if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp)) - if ((ret = __bam_c_prev(dbc, cp)) != 0) - return (ret); - - return (0); -} - -/* - * __bam_c_next -- - * Move to the next record. - */ -static int -__bam_c_next(dbc, cp, initial_move) - DBC *dbc; - CURSOR *cp; - int initial_move; -{ - DB *dbp; - db_indx_t adjust, indx; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - - /* - * We're either moving through a page of duplicates or a btree leaf - * page. - */ - if (cp->dpgno == PGNO_INVALID) { - adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX; - pgno = cp->pgno; - indx = cp->indx; - } else { - adjust = O_INDX; - pgno = cp->dpgno; - indx = cp->dindx; - } - if (cp->page == NULL) { - if ((ret = - __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - } - - /* - * If at the end of the page, move to a subsequent page. - * - * !!! - * Check for >= NUM_ENT. If we're here as the result of a search that - * landed us on NUM_ENT, we'll increment indx before we test. - * - * !!! - * This code handles empty pages and pages with only deleted entries. - */ - if (initial_move) - indx += adjust; - for (;;) { - if (indx >= NUM_ENT(cp->page)) { - /* - * If we're in a btree leaf page, we've reached the end - * of the tree. If we've reached the end of a page of - * duplicates, continue from the btree leaf page where - * we found this page of duplicates. - */ - pgno = cp->page->next_pgno; - if (pgno == PGNO_INVALID) { - /* If in a btree leaf page, it's EOF. */ - if (cp->dpgno == PGNO_INVALID) - return (DB_NOTFOUND); - - /* Continue from the last btree leaf page. */ - cp->dpgno = PGNO_INVALID; - - adjust = P_INDX; - pgno = cp->pgno; - indx = cp->indx + P_INDX; - } else - indx = 0; - - DISCARD(dbc, cp); - if ((ret = __bam_lget(dbc, - 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) - return (ret); - if ((ret = - memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - continue; - } - - /* Ignore deleted records. */ - if (IS_DELETED(cp, indx)) { - indx += adjust; - continue; - } - - /* - * If we're not in a duplicates page, check to see if we've - * found a page of duplicates, in which case we move to the - * first entry. - */ - if (cp->dpgno == PGNO_INVALID) { - cp->pgno = cp->page->pgno; - cp->indx = indx; - - if ((ret = __bam_dup(dbc, cp, indx, 0)) != 0) - return (ret); - if (cp->dpgno != PGNO_INVALID) { - indx = cp->dindx; - adjust = O_INDX; - continue; - } - } else { - cp->dpgno = cp->page->pgno; - cp->dindx = indx; - } - break; - } - return (0); -} - -/* - * __bam_c_prev -- - * Move to the previous record. - */ -static int -__bam_c_prev(dbc, cp) - DBC *dbc; - CURSOR *cp; -{ - DB *dbp; - db_indx_t indx, adjust; - db_pgno_t pgno; - int ret, set_indx; - - dbp = dbc->dbp; - - /* - * We're either moving through a page of duplicates or a btree leaf - * page. - */ - if (cp->dpgno == PGNO_INVALID) { - adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX; - pgno = cp->pgno; - indx = cp->indx; - } else { - adjust = O_INDX; - pgno = cp->dpgno; - indx = cp->dindx; - } - if (cp->page == NULL) { - if ((ret = - __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - } - - /* - * If at the beginning of the page, move to any previous one. - * - * !!! - * This code handles empty pages and pages with only deleted entries. - */ - for (;;) { - if (indx == 0) { - /* - * If we're in a btree leaf page, we've reached the - * beginning of the tree. If we've reached the first - * of a page of duplicates, continue from the btree - * leaf page where we found this page of duplicates. - */ - pgno = cp->page->prev_pgno; - if (pgno == PGNO_INVALID) { - /* If in a btree leaf page, it's SOF. */ - if (cp->dpgno == PGNO_INVALID) - return (DB_NOTFOUND); - - /* Continue from the last btree leaf page. */ - cp->dpgno = PGNO_INVALID; - - adjust = P_INDX; - pgno = cp->pgno; - indx = cp->indx; - set_indx = 0; - } else - set_indx = 1; - - DISCARD(dbc, cp); - if ((ret = __bam_lget(dbc, - 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) - return (ret); - if ((ret = - memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - - if (set_indx) - indx = NUM_ENT(cp->page); - if (indx == 0) - continue; - } - - /* Ignore deleted records. */ - indx -= adjust; - if (IS_DELETED(cp, indx)) - continue; - - /* - * If we're not in a duplicates page, check to see if we've - * found a page of duplicates, in which case we move to the - * last entry. - */ - if (cp->dpgno == PGNO_INVALID) { - cp->pgno = cp->page->pgno; - cp->indx = indx; - - if ((ret = __bam_dup(dbc, cp, indx, 1)) != 0) - return (ret); - if (cp->dpgno != PGNO_INVALID) { - indx = cp->dindx + O_INDX; - adjust = O_INDX; - continue; - } - } else { - cp->dpgno = cp->page->pgno; - cp->dindx = indx; - } - break; - } - return (0); -} - -/* - * __bam_c_search -- - * Move to a specified record. - */ -static int -__bam_c_search(dbc, cp, key, flags, exactp) - DBC *dbc; - CURSOR *cp; - const DBT *key; - u_int32_t flags; - int *exactp; -{ - BTREE *t; - DB *dbp; - DB_LOCK lock; - PAGE *h; - db_recno_t recno; - db_indx_t indx; - u_int32_t sflags; - int cmp, needexact, ret; - - dbp = dbc->dbp; - t = dbp->internal; - - /* Find an entry in the database. */ - switch (flags) { - case DB_SET_RECNO: - if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0) - return (ret); - sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND; - needexact = *exactp = 1; - ret = __bam_rsearch(dbc, &recno, sflags, 1, exactp); - break; - case DB_SET: - case DB_GET_BOTH: - sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND; - needexact = *exactp = 1; - goto search; - case DB_SET_RANGE: - sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND; - needexact = *exactp = 0; - goto search; - case DB_KEYFIRST: - sflags = S_KEYFIRST; - goto fast_search; - case DB_KEYLAST: - sflags = S_KEYLAST; -fast_search: needexact = *exactp = 0; - /* - * If the application has a history of inserting into the first - * or last pages of the database, we check those pages first to - * avoid doing a full search. - * - * Record numbers can't be fast-tracked, the entire tree has to - * be locked. - */ - h = NULL; - lock = LOCK_INVALID; - if (F_ISSET(dbp, DB_BT_RECNUM)) - goto search; - - /* Check if the application has a history of sorted input. */ - if (t->bt_lpgno == PGNO_INVALID) - goto search; - - /* - * Lock and retrieve the page on which we did the last insert. - * It's okay if it doesn't exist, or if it's not the page type - * we expected, it just means that the world changed. - */ - if (__bam_lget(dbc, 0, t->bt_lpgno, DB_LOCK_WRITE, &lock)) - goto fast_miss; - if (memp_fget(dbp->mpf, &t->bt_lpgno, 0, &h)) - goto fast_miss; - if (TYPE(h) != P_LBTREE) - goto fast_miss; - if (NUM_ENT(h) == 0) - goto fast_miss; - - /* - * What we do here is test to see if we're at the beginning or - * end of the tree and if the new item sorts before/after the - * first/last page entry. We don't try and catch inserts into - * the middle of the tree (although we could, as long as there - * were two keys on the page and we saved both the index and - * the page number of the last insert). - */ - if (h->next_pgno == PGNO_INVALID) { - indx = NUM_ENT(h) - P_INDX; - if ((cmp = - __bam_cmp(dbp, key, h, indx, t->bt_compare)) < 0) - goto try_begin; - if (cmp > 0) { - indx += P_INDX; - goto fast_hit; - } - - /* - * Found a duplicate. If doing DB_KEYLAST, we're at - * the correct position, otherwise, move to the first - * of the duplicates. - */ - if (flags == DB_KEYLAST) - goto fast_hit; - for (; - indx > 0 && h->inp[indx - P_INDX] == h->inp[indx]; - indx -= P_INDX) - ; - goto fast_hit; - } -try_begin: if (h->prev_pgno == PGNO_INVALID) { - indx = 0; - if ((cmp = - __bam_cmp(dbp, key, h, indx, t->bt_compare)) > 0) - goto fast_miss; - if (cmp < 0) - goto fast_hit; - /* - * Found a duplicate. If doing DB_KEYFIRST, we're at - * the correct position, otherwise, move to the last - * of the duplicates. - */ - if (flags == DB_KEYFIRST) - goto fast_hit; - for (; - indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && - h->inp[indx] == h->inp[indx + P_INDX]; - indx += P_INDX) - ; - goto fast_hit; - } - goto fast_miss; - -fast_hit: /* Set the exact match flag, we may have found a duplicate. */ - *exactp = cmp == 0; - - /* Enter the entry in the stack. */ - BT_STK_CLR(cp); - BT_STK_ENTER(cp, h, indx, lock, ret); - break; - -fast_miss: if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); - if (lock != LOCK_INVALID) - (void)__BT_LPUT(dbc, lock); - -search: ret = __bam_search(dbc, key, sflags, 1, NULL, exactp); - break; - default: /* XXX: Impossible. */ - abort(); - /* NOTREACHED */ - } - if (ret != 0) - return (ret); - - /* - * Initialize the cursor to reference it. This has to be done - * before we return (even with DB_NOTFOUND) because we have to - * free the page(s) we locked in __bam_search. - */ - cp->page = cp->csp->page; - cp->pgno = cp->csp->page->pgno; - cp->indx = cp->csp->indx; - cp->lock = cp->csp->lock; - cp->dpgno = PGNO_INVALID; - - /* - * If we inserted a key into the first or last slot of the tree, - * remember where it was so we can do it more quickly next time. - */ - if (flags == DB_KEYFIRST || flags == DB_KEYLAST) - t->bt_lpgno = - ((cp->page->next_pgno == PGNO_INVALID && - cp->indx >= NUM_ENT(cp->page)) || - (cp->page->prev_pgno == PGNO_INVALID && cp->indx == 0)) ? - cp->pgno : PGNO_INVALID; - - /* If we need an exact match and didn't find one, we're done. */ - if (needexact && *exactp == 0) - return (DB_NOTFOUND); - - return (0); -} - -/* - * __bam_dup -- - * Check for an off-page duplicates entry, and if found, move to the - * first or last entry. - * - * PUBLIC: int __bam_dup __P((DBC *, CURSOR *, u_int32_t, int)); - */ -int -__bam_dup(dbc, cp, indx, last_dup) - DBC *dbc; - CURSOR *cp; - u_int32_t indx; - int last_dup; -{ - BOVERFLOW *bo; - DB *dbp; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - - /* - * Check for an overflow entry. If we find one, move to the - * duplicates page, and optionally move to the last record on - * that page. - * - * !!! - * We don't lock duplicates pages, we've already got the correct - * lock on the main page. - */ - bo = GET_BOVERFLOW(cp->page, indx + O_INDX); - if (B_TYPE(bo->type) != B_DUPLICATE) - return (0); - - pgno = bo->pgno; - if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) - return (ret); - cp->page = NULL; - if (last_dup) { - if ((ret = __db_dend(dbc, pgno, &cp->page)) != 0) - return (ret); - indx = NUM_ENT(cp->page) - O_INDX; - } else { - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) - return (ret); - indx = 0; - } - - /* Update the cursor's duplicate information. */ - cp->dpgno = cp->page->pgno; - cp->dindx = indx; - - return (0); -} - -/* - * __bam_c_physdel -- - * Actually do the cursor deletion. - */ -static int -__bam_c_physdel(dbc, cp, h) - DBC *dbc; - CURSOR *cp; - PAGE *h; -{ - enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd; - BOVERFLOW bo; - DB *dbp; - DBT dbt; - DB_LOCK lock; - db_indx_t indx; - db_pgno_t pgno, next_pgno, prev_pgno; - int delete_page, local_page, ret; - - dbp = dbc->dbp; - - delete_page = ret = 0; - - /* Figure out what we're deleting. */ - if (cp->dpgno == PGNO_INVALID) { - pgno = cp->pgno; - indx = cp->indx; - } else { - pgno = cp->dpgno; - indx = cp->dindx; - } - - /* - * If the item is referenced by another cursor, set that cursor's - * delete flag and leave it up to it to do the delete. - * - * !!! - * This test for > 0 is a tricky. There are two ways that we can - * be called here. Either we are closing the cursor or we've moved - * off the page with the deleted entry. In the first case, we've - * already removed the cursor from the active queue, so we won't see - * it in __bam_ca_delete. In the second case, it will be on a different - * item, so we won't bother with it in __bam_ca_delete. - */ - if (__bam_ca_delete(dbp, pgno, indx, 1) > 0) - return (0); - - /* - * If this is concurrent DB, upgrade the lock if necessary. - */ - if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW) && - (ret = lock_get(dbp->dbenv->lk_info, - dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &dbc->mylock)) != 0) - return (EAGAIN); - - /* - * If we don't already have the page locked, get it and delete the - * items. - */ - if ((h == NULL || h->pgno != pgno)) { - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) - return (ret); - local_page = 1; - } else - local_page = 0; - - /* - * If we're deleting a duplicate entry and there are other duplicate - * entries remaining, call the common code to do the work and fix up - * the parent page as necessary. Otherwise, do a normal btree delete. - * - * There are 5 possible cases: - * - * 1. It's not a duplicate item: do a normal btree delete. - * 2. It's a duplicate item: - * 2a: We delete an item from a page of duplicates, but there are - * more items on the page. - * 2b: We delete the last item from a page of duplicates, deleting - * the last duplicate. - * 2c: We delete the last item from a page of duplicates, but there - * is a previous page of duplicates. - * 2d: We delete the last item from a page of duplicates, but there - * is a following page of duplicates. - * - * In the case of: - * - * 1: There's nothing further to do. - * 2a: There's nothing further to do. - * 2b: Do the normal btree delete instead of a duplicate delete, as - * that deletes both the duplicate chain and the parent page's - * entry. - * 2c: There's nothing further to do. - * 2d: Delete the duplicate, and update the parent page's entry. - */ - if (TYPE(h) == P_DUPLICATE) { - pgno = PGNO(h); - prev_pgno = PREV_PGNO(h); - next_pgno = NEXT_PGNO(h); - - if (NUM_ENT(h) == 1 && - prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID) - cmd = DELETE_PAGE; - else { - cmd = DELETE_ITEM; - - /* Delete the duplicate. */ - if ((ret = __db_drem(dbc, &h, indx, __bam_free)) != 0) - goto err; - - /* - * 2a: h != NULL, h->pgno == pgno - * 2b: We don't reach this clause, as the above test - * was true. - * 2c: h == NULL, prev_pgno != PGNO_INVALID - * 2d: h != NULL, next_pgno != PGNO_INVALID - * - * Test for 2a and 2c: if we didn't empty the current - * page or there was a previous page of duplicates, we - * don't need to touch the parent page. - */ - if ((h != NULL && pgno == h->pgno) || - prev_pgno != PGNO_INVALID) - cmd = NOTHING_FURTHER; - } - - /* - * Release any page we're holding and its lock. - * - * !!! - * If there is no subsequent page in the duplicate chain, then - * __db_drem will have put page "h" and set it to NULL. - */ - if (local_page) { - if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbc, lock); - local_page = 0; - } - - if (cmd == NOTHING_FURTHER) - goto done; - - /* Acquire the parent page and switch the index to its entry. */ - if ((ret = - __bam_lget(dbc, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) { - (void)__BT_TLPUT(dbc, lock); - goto err; - } - local_page = 1; - indx = cp->indx; - - if (cmd == DELETE_PAGE) - goto btd; - - /* - * Copy, delete, update, add-back the parent page's data entry. - * - * XXX - * This may be a performance/logging problem. We should add a - * log message which simply logs/updates a random set of bytes - * on a page, and use it instead of doing a delete/add pair. - */ - indx += O_INDX; - bo = *GET_BOVERFLOW(h, indx); - (void)__db_ditem(dbc, h, indx, BOVERFLOW_SIZE); - bo.pgno = next_pgno; - memset(&dbt, 0, sizeof(dbt)); - dbt.data = &bo; - dbt.size = BOVERFLOW_SIZE; - (void)__db_pitem(dbc, h, indx, BOVERFLOW_SIZE, &dbt, NULL); - (void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY); - goto done; - } - -btd: /* - * If the page is going to be emptied, delete it. To delete a leaf - * page we need a copy of a key from the page. We use the 0th page - * index since it's the last key that the page held. - * - * We malloc the page information instead of using the return key/data - * memory because we've already set them -- the reason we've already - * set them is because we're (potentially) about to do a reverse split, - * which would make our saved page information useless. - * - * !!! - * The following operations to delete a page might deadlock. I think - * that's OK. The problem is if we're deleting an item because we're - * closing cursors because we've already deadlocked and want to call - * txn_abort(). If we fail due to deadlock, we leave a locked empty - * page in the tree, which won't be empty long because we're going to - * undo the delete. - */ - if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) { - memset(&dbt, 0, sizeof(DBT)); - dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; - if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0) - goto err; - delete_page = 1; - } - - /* - * Do a normal btree delete. - * - * !!! - * Delete the key item first, otherwise the duplicate checks in - * __bam_ditem() won't work! - */ - if ((ret = __bam_ditem(dbc, h, indx)) != 0) - goto err; - if ((ret = __bam_ditem(dbc, h, indx)) != 0) - goto err; - - /* Discard any remaining locks/pages. */ - if (local_page) { - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbc, lock); - local_page = 0; - } - - /* Delete the page if it was emptied. */ - if (delete_page) - ret = __bam_dpage(dbc, &dbt); - -err: -done: if (delete_page) - __os_free(dbt.data, dbt.size); - - if (local_page) { - /* - * It's possible for h to be NULL, as __db_drem may have - * been relinking pages by the time that it deadlocked. - */ - if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbc, lock); - } - - if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) - (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, - DB_LOCK_IWRITE, 0); - - return (ret); -} - -/* - * __bam_c_getstack -- - * Acquire a full stack for a cursor. - */ -static int -__bam_c_getstack(dbc, cp) - DBC *dbc; - CURSOR *cp; -{ - DB *dbp; - DBT dbt; - PAGE *h; - db_pgno_t pgno; - int exact, ret; - - dbp = dbc->dbp; - h = NULL; - memset(&dbt, 0, sizeof(DBT)); - ret = 0; - - /* Get the page with the current item on it. */ - pgno = cp->pgno; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) - return (ret); - - /* Get a copy of a key from the page. */ - dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; - if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0) - goto err; - - /* Get a write-locked stack for that page. */ - exact = 0; - ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact); - - /* We no longer need the key or the page. */ -err: if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); - if (dbt.data != NULL) - __os_free(dbt.data, dbt.size); - return (ret); -} diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c deleted file mode 100644 index d623bd8a6f..0000000000 --- a/db2/btree/bt_delete.c +++ /dev/null @@ -1,589 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_delete.c 10.43 (Sleepycat) 12/7/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_delete -- - * Delete the items referenced by a key. - * - * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); - */ -int -__bam_delete(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - DBC *dbc; - DBT data; - u_int32_t f_init, f_next; - int ret, t_ret; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = - __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) - return (ret); - - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags); - - /* - * Walk a cursor through the key/data pairs, deleting as we go. Set - * the DB_DBT_USERMEM flag, as this might be a threaded application - * and the flags checking will catch us. We don't actually want the - * keys or data, so request a partial of length 0. - */ - memset(&data, 0, sizeof(data)); - F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL); - - /* If locking, set read-modify-write flag. */ - f_init = DB_SET; - f_next = DB_NEXT_DUP; - if (dbp->dbenv != NULL && dbp->dbenv->lk_info != NULL) { - f_init |= DB_RMW; - f_next |= DB_RMW; - } - - /* Walk through the set of key/data pairs, deleting as we go. */ - if ((ret = dbc->c_get(dbc, key, &data, f_init)) != 0) - goto err; - for (;;) { - if ((ret = dbc->c_del(dbc, 0)) != 0) - goto err; - if ((ret = dbc->c_get(dbc, key, &data, f_next)) != 0) { - if (ret == DB_NOTFOUND) { - ret = 0; - break; - } - goto err; - } - } - -err: /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && - (ret == 0 || ret == DB_NOTFOUND)) - ret = t_ret; - - return (ret); -} - -/* - * __bam_ditem -- - * Delete one or more entries from a page. - * - * PUBLIC: int __bam_ditem __P((DBC *, PAGE *, u_int32_t)); - */ -int -__bam_ditem(dbc, h, indx) - DBC *dbc; - PAGE *h; - u_int32_t indx; -{ - BINTERNAL *bi; - BKEYDATA *bk; - BOVERFLOW *bo; - DB *dbp; - u_int32_t nbytes; - int ret; - - dbp = dbc->dbp; - - switch (TYPE(h)) { - case P_IBTREE: - bi = GET_BINTERNAL(h, indx); - switch (B_TYPE(bi->type)) { - case B_DUPLICATE: - case B_OVERFLOW: - nbytes = BINTERNAL_SIZE(bi->len); - bo = (BOVERFLOW *)bi->data; - goto offpage; - case B_KEYDATA: - nbytes = BINTERNAL_SIZE(bi->len); - break; - default: - return (__db_pgfmt(dbp, h->pgno)); - } - break; - case P_IRECNO: - nbytes = RINTERNAL_SIZE; - break; - case P_LBTREE: - /* - * If it's a duplicate key, discard the index and don't touch - * the actual page item. - * - * XXX - * This works because no data item can have an index matching - * any other index so even if the data item is in a key "slot", - * it won't match any other index. - */ - if ((indx % 2) == 0) { - /* - * Check for a duplicate after us on the page. NOTE: - * we have to delete the key item before deleting the - * data item, otherwise the "indx + P_INDX" calculation - * won't work! - */ - if (indx + P_INDX < (u_int32_t)NUM_ENT(h) && - h->inp[indx] == h->inp[indx + P_INDX]) - return (__bam_adjindx(dbc, - h, indx, indx + O_INDX, 0)); - /* - * Check for a duplicate before us on the page. It - * doesn't matter if we delete the key item before or - * after the data item for the purposes of this one. - */ - if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) - return (__bam_adjindx(dbc, - h, indx, indx - P_INDX, 0)); - } - /* FALLTHROUGH */ - case P_LRECNO: - bk = GET_BKEYDATA(h, indx); - switch (B_TYPE(bk->type)) { - case B_DUPLICATE: - case B_OVERFLOW: - nbytes = BOVERFLOW_SIZE; - bo = GET_BOVERFLOW(h, indx); - -offpage: /* Delete duplicate/offpage chains. */ - if (B_TYPE(bo->type) == B_DUPLICATE) { - if ((ret = - __db_ddup(dbc, bo->pgno, __bam_free)) != 0) - return (ret); - } else - if ((ret = - __db_doff(dbc, bo->pgno, __bam_free)) != 0) - return (ret); - break; - case B_KEYDATA: - nbytes = BKEYDATA_SIZE(bk->len); - break; - default: - return (__db_pgfmt(dbp, h->pgno)); - } - break; - default: - return (__db_pgfmt(dbp, h->pgno)); - } - - /* Delete the item. */ - if ((ret = __db_ditem(dbc, h, indx, nbytes)) != 0) - return (ret); - - /* Mark the page dirty. */ - return (memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)); -} - -/* - * __bam_adjindx -- - * Adjust an index on the page. - * - * PUBLIC: int __bam_adjindx __P((DBC *, PAGE *, u_int32_t, u_int32_t, int)); - */ -int -__bam_adjindx(dbc, h, indx, indx_copy, is_insert) - DBC *dbc; - PAGE *h; - u_int32_t indx, indx_copy; - int is_insert; -{ - DB *dbp; - db_indx_t copy; - int ret; - - dbp = dbc->dbp; - - /* Log the change. */ - if (DB_LOGGING(dbc) && - (ret = __bam_adj_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h), - 0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy, - (u_int32_t)is_insert)) != 0) - return (ret); - - if (is_insert) { - copy = h->inp[indx_copy]; - if (indx != NUM_ENT(h)) - memmove(&h->inp[indx + O_INDX], &h->inp[indx], - sizeof(db_indx_t) * (NUM_ENT(h) - indx)); - h->inp[indx] = copy; - ++NUM_ENT(h); - } else { - --NUM_ENT(h); - if (indx != NUM_ENT(h)) - memmove(&h->inp[indx], &h->inp[indx + O_INDX], - sizeof(db_indx_t) * (NUM_ENT(h) - indx)); - } - - /* Mark the page dirty. */ - ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY); - - /* Adjust the cursors. */ - __bam_ca_di(dbp, h->pgno, indx, is_insert ? 1 : -1); - return (0); -} - -/* - * __bam_dpage -- - * Delete a page from the tree. - * - * PUBLIC: int __bam_dpage __P((DBC *, const DBT *)); - */ -int -__bam_dpage(dbc, key) - DBC *dbc; - const DBT *key; -{ - CURSOR *cp; - DB *dbp; - DB_LOCK lock; - PAGE *h; - db_pgno_t pgno; - int level; /* !!!: has to hold number of tree levels. */ - int exact, ret; - - dbp = dbc->dbp; - cp = dbc->internal; - ret = 0; - - /* - * The locking protocol is that we acquire locks by walking down the - * tree, to avoid the obvious deadlocks. - * - * Call __bam_search to reacquire the empty leaf page, but this time - * get both the leaf page and it's parent, locked. Walk back up the - * tree, until we have the top pair of pages that we want to delete. - * Once we have the top page that we want to delete locked, lock the - * underlying pages and check to make sure they're still empty. If - * they are, delete them. - */ - for (level = LEAFLEVEL;; ++level) { - /* Acquire a page and its parent, locked. */ - if ((ret = - __bam_search(dbc, key, S_WRPAIR, level, NULL, &exact)) != 0) - return (ret); - - /* - * If we reach the root or the page isn't going to be empty - * when we delete one record, quit. - */ - h = cp->csp[-1].page; - if (h->pgno == PGNO_ROOT || NUM_ENT(h) != 1) - break; - - /* Release the two locked pages. */ - (void)memp_fput(dbp->mpf, cp->csp[-1].page, 0); - (void)__BT_TLPUT(dbc, cp->csp[-1].lock); - (void)memp_fput(dbp->mpf, cp->csp[0].page, 0); - (void)__BT_TLPUT(dbc, cp->csp[0].lock); - } - - /* - * Leave the stack pointer one after the last entry, we may be about - * to push more items on the stack. - */ - ++cp->csp; - - /* - * cp->csp[-2].page is the top page, which we're not going to delete, - * and cp->csp[-1].page is the first page we are going to delete. - * - * Walk down the chain, acquiring the rest of the pages until we've - * retrieved the leaf page. If we find any pages that aren't going - * to be emptied by the delete, someone else added something while we - * were walking the tree, and we discontinue the delete. - */ - for (h = cp->csp[-1].page;;) { - if (ISLEAF(h)) { - if (NUM_ENT(h) != 0) - goto release; - break; - } else - if (NUM_ENT(h) != 1) - goto release; - - /* - * Get the next page, write lock it and push it onto the stack. - * We know it's index 0, because it can only have one element. - */ - pgno = TYPE(h) == P_IBTREE ? - GET_BINTERNAL(h, 0)->pgno : GET_RINTERNAL(h, 0)->pgno; - - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &lock)) != 0) - goto release; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) - goto release; - BT_STK_PUSH(cp, h, 0, lock, ret); - } - - /* Adjust back to reference the last page on the stack. */ - BT_STK_POP(cp); - - /* Delete the pages. */ - return (__bam_dpages(dbc)); - -release: - /* Adjust back to reference the last page on the stack. */ - BT_STK_POP(cp); - - /* Discard any locked pages and return. */ - __bam_stkrel(dbc, 0); - - return (ret); -} - -/* - * __bam_dpages -- - * Delete a set of locked pages. - * - * PUBLIC: int __bam_dpages __P((DBC *)); - */ -int -__bam_dpages(dbc) - DBC *dbc; -{ - CURSOR *cp; - DB *dbp; - DBT a, b; - DB_LOCK c_lock, p_lock; - EPG *epg; - PAGE *child, *parent; - db_indx_t nitems; - db_pgno_t pgno; - db_recno_t rcnt; - int done, ret; - - dbp = dbc->dbp; - cp = dbc->internal; - epg = cp->sp; - - /* - * !!! - * There is an interesting deadlock situation here. We have to relink - * the leaf page chain around the leaf page being deleted. Consider - * a cursor walking through the leaf pages, that has the previous page - * read-locked and is waiting on a lock for the page we're deleting. - * It will deadlock here. This is a problem, because if our process is - * selected to resolve the deadlock, we'll leave an empty leaf page - * that we can never again access by walking down the tree. So, before - * we unlink the subtree, we relink the leaf page chain. - */ - if ((ret = __db_relink(dbc, DB_REM_PAGE, cp->csp->page, NULL, 1)) != 0) - goto release; - - /* - * We have the entire stack of deletable pages locked. - * - * Delete the highest page in the tree's reference to the underlying - * stack of pages. Then, release that page, letting the rest of the - * tree get back to business. - */ - if ((ret = __bam_ditem(dbc, epg->page, epg->indx)) != 0) { -release: (void)__bam_stkrel(dbc, 0); - return (ret); - } - - pgno = epg->page->pgno; - nitems = NUM_ENT(epg->page); - - (void)memp_fput(dbp->mpf, epg->page, 0); - (void)__BT_TLPUT(dbc, epg->lock); - - /* - * Free the rest of the stack of pages. - * - * !!! - * Don't bother checking for errors. We've unlinked the subtree from - * the tree, and there's no possibility of recovery outside of doing - * TXN rollback. - */ - while (++epg <= cp->csp) { - /* - * Delete page entries so they will be restored as part of - * recovery. - */ - if (NUM_ENT(epg->page) != 0) - (void)__bam_ditem(dbc, epg->page, epg->indx); - - (void)__bam_free(dbc, epg->page); - (void)__BT_TLPUT(dbc, epg->lock); - } - BT_STK_CLR(cp); - - /* - * Try and collapse the tree a level -- this is only applicable - * if we've deleted the next-to-last element from the root page. - * - * There are two cases when collapsing a tree. - * - * If we've just deleted the last item from the root page, there is no - * further work to be done. The code above has emptied the root page - * and freed all pages below it. - */ - if (pgno != PGNO_ROOT || nitems != 1) - return (0); - - /* - * If we just deleted the next-to-last item from the root page, the - * tree can collapse one or more levels. While there remains only a - * single item on the root page, write lock the last page referenced - * by the root page and copy it over the root page. If we can't get a - * write lock, that's okay, the tree just stays deeper than we'd like. - */ - for (done = 0; !done;) { - /* Initialize. */ - parent = child = NULL; - p_lock = c_lock = LOCK_INVALID; - - /* Lock the root. */ - pgno = PGNO_ROOT; - if ((ret = - __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &p_lock)) != 0) - goto stop; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &parent)) != 0) - goto stop; - - if (NUM_ENT(parent) != 1 || - (TYPE(parent) != P_IBTREE && TYPE(parent) != P_IRECNO)) - goto stop; - - pgno = TYPE(parent) == P_IBTREE ? - GET_BINTERNAL(parent, 0)->pgno : - GET_RINTERNAL(parent, 0)->pgno; - - /* Lock the child page. */ - if ((ret = - __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &c_lock)) != 0) - goto stop; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &child)) != 0) - goto stop; - - /* Log the change. */ - if (DB_LOGGING(dbc)) { - memset(&a, 0, sizeof(a)); - a.data = child; - a.size = dbp->pgsize; - memset(&b, 0, sizeof(b)); - b.data = P_ENTRY(parent, 0); - b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len); - __bam_rsplit_log(dbp->dbenv->lg_info, dbc->txn, - &child->lsn, 0, dbp->log_fileid, child->pgno, &a, - RE_NREC(parent), &b, &parent->lsn); - } - - /* - * Make the switch. - * - * One fixup -- if the tree has record numbers and we're not - * converting to a leaf page, we have to preserve the total - * record count. Note that we are about to overwrite everything - * on the parent, including its LSN. This is actually OK, - * because the above log message, which describes this update, - * stores its LSN on the child page. When the child is copied - * to the parent, the correct LSN is going to copied into - * place in the parent. - */ - COMPQUIET(rcnt, 0); - if (TYPE(child) == P_IRECNO || - (TYPE(child) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM))) - rcnt = RE_NREC(parent); - memcpy(parent, child, dbp->pgsize); - parent->pgno = PGNO_ROOT; - if (TYPE(child) == P_IRECNO || - (TYPE(child) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM))) - RE_NREC_SET(parent, rcnt); - - /* Mark the pages dirty. */ - memp_fset(dbp->mpf, parent, DB_MPOOL_DIRTY); - memp_fset(dbp->mpf, child, DB_MPOOL_DIRTY); - - /* Adjust the cursors. */ - __bam_ca_rsplit(dbp, child->pgno, PGNO_ROOT); - - /* - * Free the page copied onto the root page and discard its - * lock. (The call to __bam_free() discards our reference - * to the page.) - */ - (void)__bam_free(dbc, child); - child = NULL; - - if (0) { -stop: done = 1; - } - if (p_lock != LOCK_INVALID) - (void)__BT_TLPUT(dbc, p_lock); - if (parent != NULL) - memp_fput(dbp->mpf, parent, 0); - if (c_lock != LOCK_INVALID) - (void)__BT_TLPUT(dbc, c_lock); - if (child != NULL) - memp_fput(dbp->mpf, child, 0); - } - - return (0); -} diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c deleted file mode 100644 index a89cfccb97..0000000000 --- a/db2/btree/bt_open.c +++ /dev/null @@ -1,310 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_open.c 10.39 (Sleepycat) 11/21/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_open -- - * Open a btree. - * - * PUBLIC: int __bam_open __P((DB *, DB_INFO *)); - */ -int -__bam_open(dbp, dbinfo) - DB *dbp; - DB_INFO *dbinfo; -{ - BTREE *t; - int ret; - - /* Allocate and initialize the private btree structure. */ - if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0) - return (ret); - dbp->internal = t; - - /* - * Intention is to make sure all of the user's selections are okay - * here and then use them without checking. - */ - if (dbinfo == NULL) { - t->bt_minkey = DEFMINKEYPAGE; - t->bt_compare = __bam_defcmp; - t->bt_prefix = __bam_defpfx; - } else { - /* Minimum number of keys per page. */ - if (dbinfo->bt_minkey == 0) - t->bt_minkey = DEFMINKEYPAGE; - else { - if (dbinfo->bt_minkey < 2) - goto einval; - t->bt_minkey = dbinfo->bt_minkey; - } - - /* Maximum number of keys per page. */ - if (dbinfo->bt_maxkey == 0) - t->bt_maxkey = 0; - else { - if (dbinfo->bt_maxkey < 1) - goto einval; - t->bt_maxkey = dbinfo->bt_maxkey; - } - - /* - * If no comparison, use default comparison. If no comparison - * and no prefix, use default prefix. (We can't default the - * prefix if the user supplies a comparison routine; shortening - * the keys may break their comparison algorithm. We don't - * permit the user to specify a prefix routine if they didn't - * also specify a comparison routine, they can't know enough - * about our comparison routine to get it right.) - */ - if ((t->bt_compare = dbinfo->bt_compare) == NULL) { - if (dbinfo->bt_prefix != NULL) - goto einval; - t->bt_compare = __bam_defcmp; - t->bt_prefix = __bam_defpfx; - } else - t->bt_prefix = dbinfo->bt_prefix; - } - - /* Initialize the remaining fields/methods of the DB. */ - dbp->am_close = __bam_close; - dbp->del = __bam_delete; - dbp->stat = __bam_stat; - - /* Start up the tree. */ - if ((ret = __bam_read_root(dbp)) != 0) - goto err; - - /* Set the overflow page size. */ - __bam_setovflsize(dbp); - - return (0); - -einval: ret = EINVAL; - -err: __os_free(t, sizeof(BTREE)); - return (ret); -} - -/* - * __bam_close -- - * Close a btree. - * - * PUBLIC: int __bam_close __P((DB *)); - */ -int -__bam_close(dbp) - DB *dbp; -{ - __os_free(dbp->internal, sizeof(BTREE)); - dbp->internal = NULL; - - return (0); -} - -/* - * __bam_setovflsize -- - * - * PUBLIC: void __bam_setovflsize __P((DB *)); - */ -void -__bam_setovflsize(dbp) - DB *dbp; -{ - BTREE *t; - - t = dbp->internal; - - /* - * !!! - * Correction for recno, which doesn't know anything about minimum - * keys per page. - */ - if (t->bt_minkey == 0) - t->bt_minkey = DEFMINKEYPAGE; - - /* - * The btree data structure requires that at least two key/data pairs - * can fit on a page, but other than that there's no fixed requirement. - * Translate the minimum number of items into the bytes a key/data pair - * can use before being placed on an overflow page. We calculate for - * the worst possible alignment by assuming every item requires the - * maximum alignment for padding. - * - * Recno uses the btree bt_ovflsize value -- it's close enough. - */ - t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX) - - (BKEYDATA_PSIZE(0) + ALIGN(1, 4)); -} - -/* - * __bam_read_root -- - * Check (and optionally create) a tree. - * - * PUBLIC: int __bam_read_root __P((DB *)); - */ -int -__bam_read_root(dbp) - DB *dbp; -{ - BTMETA *meta; - BTREE *t; - DBC *dbc; - DB_LOCK metalock, rootlock; - PAGE *root; - db_pgno_t pgno; - int ret, t_ret; - - ret = 0; - t = dbp->internal; - - /* Get a cursor. */ - if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) - return (ret); - - /* Get, and optionally create the metadata page. */ - pgno = PGNO_METADATA; - if ((ret = - __bam_lget(dbc, 0, PGNO_METADATA, DB_LOCK_WRITE, &metalock)) != 0) - goto err; - if ((ret = - memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0) { - (void)__BT_LPUT(dbc, metalock); - goto err; - } - - /* - * If the magic number is correct, we're not creating the tree. - * Correct any fields that may not be right. Note, all of the - * local flags were set by db_open(3). - */ - if (meta->magic != 0) { - t->bt_maxkey = meta->maxkey; - t->bt_minkey = meta->minkey; - - (void)memp_fput(dbp->mpf, (PAGE *)meta, 0); - (void)__BT_LPUT(dbc, metalock); - goto done; - } - - /* Initialize the tree structure metadata information. */ - memset(meta, 0, sizeof(BTMETA)); - ZERO_LSN(meta->lsn); - meta->pgno = PGNO_METADATA; - meta->magic = DB_BTREEMAGIC; - meta->version = DB_BTREEVERSION; - meta->pagesize = dbp->pgsize; - meta->maxkey = t->bt_maxkey; - meta->minkey = t->bt_minkey; - meta->free = PGNO_INVALID; - if (dbp->type == DB_RECNO) - F_SET(meta, BTM_RECNO); - if (F_ISSET(dbp, DB_AM_DUP)) - F_SET(meta, BTM_DUP); - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) - F_SET(meta, BTM_FIXEDLEN); - if (F_ISSET(dbp, DB_BT_RECNUM)) - F_SET(meta, BTM_RECNUM); - if (F_ISSET(dbp, DB_RE_RENUMBER)) - F_SET(meta, BTM_RENUMBER); - memcpy(meta->uid, dbp->fileid, DB_FILE_ID_LEN); - - /* Create and initialize a root page. */ - pgno = PGNO_ROOT; - if ((ret = - __bam_lget(dbc, 0, PGNO_ROOT, DB_LOCK_WRITE, &rootlock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0) { - (void)__BT_LPUT(dbc, rootlock); - goto err; - } - P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID, - PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE); - ZERO_LSN(root->lsn); - - /* Release the metadata and root pages. */ - if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0) - goto err; - if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0) - goto err; - - /* - * Flush the metadata and root pages to disk -- since the user can't - * transaction protect open, the pages have to exist during recovery. - * - * XXX - * It's not useful to return not-yet-flushed here -- convert it to - * an error. - */ - if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) - ret = EINVAL; - - /* Release the locks. */ - (void)__BT_LPUT(dbc, metalock); - (void)__BT_LPUT(dbc, rootlock); - -err: -done: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} diff --git a/db2/btree/bt_page.c b/db2/btree/bt_page.c deleted file mode 100644 index 6ccd68a5ab..0000000000 --- a/db2/btree/bt_page.c +++ /dev/null @@ -1,317 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_page.c 10.17 (Sleepycat) 1/3/99"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_new -- - * Get a new page, preferably from the freelist. - * - * PUBLIC: int __bam_new __P((DBC *, u_int32_t, PAGE **)); - */ -int -__bam_new(dbc, type, pagepp) - DBC *dbc; - u_int32_t type; - PAGE **pagepp; -{ - BTMETA *meta; - DB *dbp; - DB_LOCK metalock; - PAGE *h; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - meta = NULL; - h = NULL; - metalock = LOCK_INVALID; - - pgno = PGNO_METADATA; - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) - goto err; - - if (meta->free == PGNO_INVALID) { - if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_NEW, &h)) != 0) - goto err; - ZERO_LSN(h->lsn); - h->pgno = pgno; - } else { - pgno = meta->free; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) - goto err; - meta->free = h->next_pgno; - } - - /* Log the change. */ - if (DB_LOGGING(dbc)) { - if ((ret = __bam_pg_alloc_log(dbp->dbenv->lg_info, dbc->txn, - &meta->lsn, 0, dbp->log_fileid, &meta->lsn, &h->lsn, - h->pgno, (u_int32_t)type, meta->free)) != 0) - goto err; - LSN(h) = LSN(meta); - } - - (void)memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY); - (void)__BT_TLPUT(dbc, metalock); - - P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type); - *pagepp = h; - return (0); - -err: if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); - if (meta != NULL) - (void)memp_fput(dbp->mpf, meta, 0); - if (metalock != LOCK_INVALID) - (void)__BT_TLPUT(dbc, metalock); - return (ret); -} - -/* - * __bam_lput -- - * The standard lock put call. - * - * PUBLIC: int __bam_lput __P((DBC *, DB_LOCK)); - */ -int -__bam_lput(dbc, lock) - DBC *dbc; - DB_LOCK lock; -{ - return (__BT_LPUT(dbc, lock)); -} - -/* - * __bam_free -- - * Add a page to the head of the freelist. - * - * PUBLIC: int __bam_free __P((DBC *, PAGE *)); - */ -int -__bam_free(dbc, h) - DBC *dbc; - PAGE *h; -{ - BTMETA *meta; - DB *dbp; - DBT ldbt; - DB_LOCK metalock; - db_pgno_t pgno; - u_int32_t dirty_flag; - int ret, t_ret; - - dbp = dbc->dbp; - - /* - * Retrieve the metadata page and insert the page at the head of - * the free list. If either the lock get or page get routines - * fail, then we need to put the page with which we were called - * back because our caller assumes we take care of it. - */ - dirty_flag = 0; - pgno = PGNO_METADATA; - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) { - (void)__BT_TLPUT(dbc, metalock); - goto err; - } - - /* Log the change. */ - if (DB_LOGGING(dbc)) { - memset(&ldbt, 0, sizeof(ldbt)); - ldbt.data = h; - ldbt.size = P_OVERHEAD; - if ((ret = __bam_pg_free_log(dbp->dbenv->lg_info, - dbc->txn, &meta->lsn, 0, dbp->log_fileid, h->pgno, - &meta->lsn, &ldbt, meta->free)) != 0) { - (void)memp_fput(dbp->mpf, (PAGE *)meta, 0); - (void)__BT_TLPUT(dbc, metalock); - return (ret); - } - LSN(h) = LSN(meta); - } - - /* - * The page should have nothing interesting on it, re-initialize it, - * leaving only the page number and the LSN. - */ -#ifdef DIAGNOSTIC - { db_pgno_t __pgno; DB_LSN __lsn; - __pgno = h->pgno; - __lsn = h->lsn; - memset(h, 0xdb, dbp->pgsize); - h->pgno = __pgno; - h->lsn = __lsn; - } -#endif - P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, meta->free, 0, P_INVALID); - - /* Link the page on the metadata free list. */ - meta->free = h->pgno; - - /* Discard the metadata page. */ - ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY); - if ((t_ret = __BT_TLPUT(dbc, metalock)) != 0) - ret = t_ret; - - /* Discard the caller's page reference. */ - dirty_flag = DB_MPOOL_DIRTY; -err: if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0) - ret = t_ret; - - /* - * XXX - * We have to unlock the caller's page in the caller! - */ - return (ret); -} - -#ifdef DEBUG -/* - * __bam_lt -- - * Print out the list of locks currently held by a cursor. - * - * PUBLIC: int __bam_lt __P((DBC *)); - */ -int -__bam_lt(dbc) - DBC *dbc; -{ - DB *dbp; - DB_LOCKREQ req; - - dbp = dbc->dbp; - if (F_ISSET(dbp, DB_AM_LOCKING)) { - req.op = DB_LOCK_DUMP; - lock_vec(dbp->dbenv->lk_info, dbc->locker, 0, &req, 1, NULL); - } - return (0); -} -#endif - -/* - * __bam_lget -- - * The standard lock get call. - * - * PUBLIC: int __bam_lget - * PUBLIC: __P((DBC *, int, db_pgno_t, db_lockmode_t, DB_LOCK *)); - */ -int -__bam_lget(dbc, do_couple, pgno, mode, lockp) - DBC *dbc; - int do_couple; - db_pgno_t pgno; - db_lockmode_t mode; - DB_LOCK *lockp; -{ - DB *dbp; - DB_LOCKREQ couple[2]; - int ret; - - dbp = dbc->dbp; - - if (!F_ISSET(dbp, DB_AM_LOCKING)) { - *lockp = LOCK_INVALID; - return (0); - } - - dbc->lock.pgno = pgno; - - /* - * If the object not currently locked, acquire the lock and return, - * otherwise, lock couple. If we fail and it's not a system error, - * convert to EAGAIN. - */ - if (do_couple) { - couple[0].op = DB_LOCK_GET; - couple[0].obj = &dbc->lock_dbt; - couple[0].mode = mode; - couple[1].op = DB_LOCK_PUT; - couple[1].lock = *lockp; - - if (dbc->txn == NULL) - ret = lock_vec(dbp->dbenv->lk_info, - dbc->locker, 0, couple, 2, NULL); - else - ret = lock_tvec(dbp->dbenv->lk_info, - dbc->txn, 0, couple, 2, NULL); - if (ret != 0) { - /* If we fail, discard the lock we held. */ - __BT_LPUT(dbc, *lockp); - - return (ret < 0 ? EAGAIN : ret); - } - *lockp = couple[0].lock; - } else { - if (dbc->txn == NULL) - ret = lock_get(dbp->dbenv->lk_info, - dbc->locker, 0, &dbc->lock_dbt, mode, lockp); - else - ret = lock_tget(dbp->dbenv->lk_info, - dbc->txn, 0, &dbc->lock_dbt, mode, lockp); - return (ret < 0 ? EAGAIN : ret); - } - return (0); -} diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c deleted file mode 100644 index 0d7a69889a..0000000000 --- a/db2/btree/bt_put.c +++ /dev/null @@ -1,831 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_put.c 10.54 (Sleepycat) 12/6/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -static int __bam_fixed __P((DBC *, DBT *)); -static int __bam_ndup __P((DBC *, PAGE *, u_int32_t)); -static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *)); -static int __bam_partial __P((DBC *, - DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t)); -static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t)); - -/* - * __bam_iitem -- - * Insert an item into the tree. - * - * PUBLIC: int __bam_iitem __P((DBC *, - * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t)); - */ -int -__bam_iitem(dbc, hp, indxp, key, data, op, flags) - DBC *dbc; - PAGE **hp; - db_indx_t *indxp; - DBT *key, *data; - u_int32_t op, flags; -{ - BTREE *t; - BKEYDATA *bk; - DB *dbp; - DBT tdbt; - PAGE *h; - db_indx_t indx, nbytes; - u_int32_t data_size, have_bytes, need_bytes, needed; - int bigkey, bigdata, dupadjust, replace, ret; - - COMPQUIET(bk, NULL); - - dbp = dbc->dbp; - t = dbp->internal; - h = *hp; - indx = *indxp; - dupadjust = replace = 0; - - /* - * If it's a page of duplicates, call the common code to do the work. - * - * !!! - * Here's where the hp and indxp are important. The duplicate code - * may decide to rework/rearrange the pages and indices we're using, - * so the caller must understand that the page stack may change. - */ - if (TYPE(h) == P_DUPLICATE) { - /* Adjust the index for the new item if it's a DB_AFTER op. */ - if (op == DB_AFTER) - ++*indxp; - - /* Remove the current item if it's a DB_CURRENT op. */ - if (op == DB_CURRENT) { - bk = GET_BKEYDATA(*hp, *indxp); - switch (B_TYPE(bk->type)) { - case B_KEYDATA: - nbytes = BKEYDATA_SIZE(bk->len); - break; - case B_OVERFLOW: - nbytes = BOVERFLOW_SIZE; - break; - default: - return (__db_pgfmt(dbp, h->pgno)); - } - if ((ret = __db_ditem(dbc, *hp, *indxp, nbytes)) != 0) - return (ret); - } - - /* Put the new/replacement item onto the page. */ - if ((ret = __db_dput(dbc, data, hp, indxp, __bam_new)) != 0) - return (ret); - - goto done; - } - - /* Handle fixed-length records: build the real record. */ - if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->recno->re_len) { - tdbt = *data; - if ((ret = __bam_fixed(dbc, &tdbt)) != 0) - return (ret); - data = &tdbt; - } - - /* - * Figure out how much space the data will take, including if it's a - * partial record. If either of the key or data items won't fit on - * a page, we'll have to store them on overflow pages. - */ - bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize; - data_size = F_ISSET(data, DB_DBT_PARTIAL) ? - __bam_partsize(data, h, indx) : data->size; - bigdata = data_size > t->bt_ovflsize; - - needed = 0; - if (LF_ISSET(BI_NEWKEY)) { - /* If BI_NEWKEY is set we're adding a new key and data pair. */ - if (bigkey) - needed += BOVERFLOW_PSIZE; - else - needed += BKEYDATA_PSIZE(key->size); - if (bigdata) - needed += BOVERFLOW_PSIZE; - else - needed += BKEYDATA_PSIZE(data_size); - } else { - /* - * We're either overwriting the data item of a key/data pair - * or we're adding the data item only, i.e. a new duplicate. - */ - if (op == DB_CURRENT) { - bk = GET_BKEYDATA(h, - indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); - if (B_TYPE(bk->type) == B_KEYDATA) - have_bytes = BKEYDATA_PSIZE(bk->len); - else - have_bytes = BOVERFLOW_PSIZE; - need_bytes = 0; - } else { - have_bytes = 0; - need_bytes = sizeof(db_indx_t); - } - if (bigdata) - need_bytes += BOVERFLOW_PSIZE; - else - need_bytes += BKEYDATA_PSIZE(data_size); - - if (have_bytes < need_bytes) - needed += need_bytes - have_bytes; - } - - /* - * If there's not enough room, or the user has put a ceiling on the - * number of keys permitted in the page, split the page. - * - * XXX - * The t->bt_maxkey test here may be insufficient -- do we have to - * check in the btree split code, so we don't undo it there!?!? - */ - if (P_FREESPACE(h) < needed || - (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) - return (DB_NEEDSPLIT); - - /* Handle partial puts: build the real record. */ - if (F_ISSET(data, DB_DBT_PARTIAL)) { - tdbt = *data; - if ((ret = __bam_partial(dbc, - &tdbt, h, indx, data_size, flags)) != 0) - return (ret); - data = &tdbt; - } - - /* - * The code breaks it up into six cases: - * - * 1. Append a new key/data pair. - * 2. Insert a new key/data pair. - * 3. Append a new data item (a new duplicate). - * 4. Insert a new data item (a new duplicate). - * 5. Overflow item: delete and re-add the data item. - * 6. Replace the data item. - */ - if (LF_ISSET(BI_NEWKEY)) { - switch (op) { - case DB_AFTER: /* 1. Append a new key/data pair. */ - indx += 2; - *indxp += 2; - break; - case DB_BEFORE: /* 2. Insert a new key/data pair. */ - break; - default: - return (EINVAL); - } - - /* Add the key. */ - if (bigkey) { - if ((ret = __bam_ovput(dbc, h, indx, key)) != 0) - return (ret); - } else - if ((ret = __db_pitem(dbc, h, indx, - BKEYDATA_SIZE(key->size), NULL, key)) != 0) - return (ret); - ++indx; - } else { - switch (op) { - case DB_AFTER: /* 3. Append a new data item. */ - if (TYPE(h) == P_LBTREE) { - /* - * Adjust the cursor and copy in the key for - * the duplicate. - */ - if ((ret = __bam_adjindx(dbc, - h, indx + P_INDX, indx, 1)) != 0) - return (ret); - - indx += 3; - dupadjust = 1; - - *indxp += 2; - } else { - ++indx; - __bam_ca_di(dbp, h->pgno, indx, 1); - - *indxp += 1; - } - break; - case DB_BEFORE: /* 4. Insert a new data item. */ - if (TYPE(h) == P_LBTREE) { - /* - * Adjust the cursor and copy in the key for - * the duplicate. - */ - if ((ret = - __bam_adjindx(dbc, h, indx, indx, 1)) != 0) - return (ret); - - ++indx; - dupadjust = 1; - } else - __bam_ca_di(dbp, h->pgno, indx, 1); - break; - case DB_CURRENT: - if (TYPE(h) == P_LBTREE) - ++indx; - - /* - * 5. Delete/re-add the data item. - * - * If we're dealing with offpage items, we have to - * delete and then re-add the item. - */ - if (bigdata || B_TYPE(bk->type) != B_KEYDATA) { - if ((ret = __bam_ditem(dbc, h, indx)) != 0) - return (ret); - break; - } - - /* 6. Replace the data item. */ - replace = 1; - break; - default: - return (EINVAL); - } - } - - /* Add the data. */ - if (bigdata) { - if ((ret = __bam_ovput(dbc, h, indx, data)) != 0) - return (ret); - } else { - BKEYDATA __bk; - DBT __hdr; - - if (LF_ISSET(BI_DELETED)) { - B_TSET(__bk.type, B_KEYDATA, 1); - __bk.len = data->size; - __hdr.data = &__bk; - __hdr.size = SSZA(BKEYDATA, data); - ret = __db_pitem(dbc, h, indx, - BKEYDATA_SIZE(data->size), &__hdr, data); - } else if (replace) - ret = __bam_ritem(dbc, h, indx, data); - else - ret = __db_pitem(dbc, h, indx, - BKEYDATA_SIZE(data->size), NULL, data); - if (ret != 0) - return (ret); - } - - if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) - return (ret); - - /* - * If the page is at least 50% full, and we added a duplicate, see if - * that set of duplicates takes up at least 25% of the space. If it - * does, move it off onto its own page. - */ - if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) { - --indx; - if ((ret = __bam_ndup(dbc, h, indx)) != 0) - return (ret); - } - - /* - * If we've changed the record count, update the tree. Record counts - * need to be updated in recno databases and in btree databases where - * we are supporting records. In both cases, adjust the count if the - * operation wasn't performed on the current record or when the caller - * overrides and wants the adjustment made regardless. - */ -done: if (LF_ISSET(BI_DOINCR) || - (op != DB_CURRENT && - (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO))) - if ((ret = __bam_adjust(dbc, 1)) != 0) - return (ret); - - /* If we've modified a recno file, set the flag */ - if (t->recno != NULL) - F_SET(t->recno, RECNO_MODIFIED); - - return (ret); -} - -/* - * __bam_partsize -- - * Figure out how much space a partial data item is in total. - */ -static u_int32_t -__bam_partsize(data, h, indx) - DBT *data; - PAGE *h; - u_int32_t indx; -{ - BKEYDATA *bk; - u_int32_t nbytes; - - /* - * Figure out how much total space we'll need. If the record doesn't - * already exist, it's simply the data we're provided. - */ - if (indx >= NUM_ENT(h)) - return (data->doff + data->size); - - /* - * Otherwise, it's the data provided plus any already existing data - * that we're not replacing. - */ - bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); - nbytes = - B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len; - - /* - * There are really two cases here: - * - * Case 1: We are replacing some bytes that do not exist (i.e., they - * are past the end of the record). In this case the number of bytes - * we are replacing is irrelevant and all we care about is how many - * bytes we are going to add from offset. So, the new record length - * is going to be the size of the new bytes (size) plus wherever those - * new bytes begin (doff). - * - * Case 2: All the bytes we are replacing exist. Therefore, the new - * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) - * plus the bytes we are adding (size). - */ - if (nbytes < data->doff + data->dlen) /* Case 1 */ - return (data->doff + data->size); - - return (nbytes + data->size - data->dlen); /* Case 2 */ -} - -/* - * OVPUT -- - * Copy an overflow item onto a page. - */ -#undef OVPUT -#define OVPUT(h, indx, bo) do { \ - DBT __hdr; \ - memset(&__hdr, 0, sizeof(__hdr)); \ - __hdr.data = &bo; \ - __hdr.size = BOVERFLOW_SIZE; \ - if ((ret = __db_pitem(dbc, \ - h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \ - return (ret); \ -} while (0) - -/* - * __bam_ovput -- - * Build an overflow item and put it on the page. - */ -static int -__bam_ovput(dbc, h, indx, item) - DBC *dbc; - PAGE *h; - u_int32_t indx; - DBT *item; -{ - BOVERFLOW bo; - int ret; - - UMRW(bo.unused1); - B_TSET(bo.type, B_OVERFLOW, 0); - UMRW(bo.unused2); - if ((ret = __db_poff(dbc, item, &bo.pgno, __bam_new)) != 0) - return (ret); - bo.tlen = item->size; - - OVPUT(h, indx, bo); - - return (0); -} - -/* - * __bam_ritem -- - * Replace an item on a page. - * - * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *)); - */ -int -__bam_ritem(dbc, h, indx, data) - DBC *dbc; - PAGE *h; - u_int32_t indx; - DBT *data; -{ - BKEYDATA *bk; - DB *dbp; - DBT orig, repl; - db_indx_t cnt, lo, ln, min, off, prefix, suffix; - int32_t nbytes; - int ret; - u_int8_t *p, *t; - - dbp = dbc->dbp; - - /* - * Replace a single item onto a page. The logic figuring out where - * to insert and whether it fits is handled in the caller. All we do - * here is manage the page shuffling. - */ - bk = GET_BKEYDATA(h, indx); - - /* Log the change. */ - if (DB_LOGGING(dbc)) { - /* - * We might as well check to see if the two data items share - * a common prefix and suffix -- it can save us a lot of log - * message if they're large. - */ - min = data->size < bk->len ? data->size : bk->len; - for (prefix = 0, - p = bk->data, t = data->data; - prefix < min && *p == *t; ++prefix, ++p, ++t) - ; - - min -= prefix; - for (suffix = 0, - p = (u_int8_t *)bk->data + bk->len - 1, - t = (u_int8_t *)data->data + data->size - 1; - suffix < min && *p == *t; ++suffix, --p, --t) - ; - - /* We only log the parts of the keys that have changed. */ - orig.data = (u_int8_t *)bk->data + prefix; - orig.size = bk->len - (prefix + suffix); - repl.data = (u_int8_t *)data->data + prefix; - repl.size = data->size - (prefix + suffix); - if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbc->txn, - &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), - (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type), - &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0) - return (ret); - } - - /* - * Set references to the first in-use byte on the page and the - * first byte of the item being replaced. - */ - p = (u_int8_t *)h + HOFFSET(h); - t = (u_int8_t *)bk; - - /* - * If the entry is growing in size, shift the beginning of the data - * part of the page down. If the entry is shrinking in size, shift - * the beginning of the data part of the page up. Use memmove(3), - * the regions overlap. - */ - lo = BKEYDATA_SIZE(bk->len); - ln = BKEYDATA_SIZE(data->size); - if (lo != ln) { - nbytes = lo - ln; /* Signed difference. */ - if (p == t) /* First index is fast. */ - h->inp[indx] += nbytes; - else { /* Else, shift the page. */ - memmove(p + nbytes, p, t - p); - - /* Adjust the indices' offsets. */ - off = h->inp[indx]; - for (cnt = 0; cnt < NUM_ENT(h); ++cnt) - if (h->inp[cnt] <= off) - h->inp[cnt] += nbytes; - } - - /* Clean up the page and adjust the item's reference. */ - HOFFSET(h) += nbytes; - t += nbytes; - } - - /* Copy the new item onto the page. */ - bk = (BKEYDATA *)t; - B_TSET(bk->type, B_KEYDATA, 0); - bk->len = data->size; - memcpy(bk->data, data->data, data->size); - - return (0); -} - -/* - * __bam_ndup -- - * Check to see if the duplicate set at indx should have its own page. - * If it should, create it. - */ -static int -__bam_ndup(dbc, h, indx) - DBC *dbc; - PAGE *h; - u_int32_t indx; -{ - BKEYDATA *bk; - BOVERFLOW bo; - DB *dbp; - DBT hdr; - PAGE *cp; - db_indx_t cnt, cpindx, first, sz; - int ret; - - dbp = dbc->dbp; - - while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) - indx -= P_INDX; - for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) { - if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx]) - break; - bk = GET_BKEYDATA(h, indx); - sz += B_TYPE(bk->type) == B_KEYDATA ? - BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; - bk = GET_BKEYDATA(h, indx + O_INDX); - sz += B_TYPE(bk->type) == B_KEYDATA ? - BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; - } - - /* - * If this set of duplicates is using more than 25% of the page, move - * them off. The choice of 25% is a WAG, but it has to be small enough - * that we can always split regardless of the presence of duplicates. - */ - if (sz < dbp->pgsize / 4) - return (0); - - /* Get a new page. */ - if ((ret = __bam_new(dbc, P_DUPLICATE, &cp)) != 0) - return (ret); - - /* - * Move this set of duplicates off the page. First points to the first - * key of the first duplicate key/data pair, cnt is the number of pairs - * we're dealing with. - */ - memset(&hdr, 0, sizeof(hdr)); - for (indx = first + O_INDX, cpindx = 0;; ++cpindx) { - /* Copy the entry to the new page. */ - bk = GET_BKEYDATA(h, indx); - hdr.data = bk; - hdr.size = B_TYPE(bk->type) == B_KEYDATA ? - BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE; - if ((ret = - __db_pitem(dbc, cp, cpindx, hdr.size, &hdr, NULL)) != 0) - goto err; - - /* - * Move cursors referencing the old entry to the new entry. - * Done after the page put because __db_pitem() adjusts - * cursors on the new page, and before the delete because - * __db_ditem adjusts cursors on the old page. - */ - __bam_ca_dup(dbp, - PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx); - - /* Delete the data item. */ - if ((ret = __db_ditem(dbc, h, indx, hdr.size)) != 0) - goto err; - - /* Delete all but the first reference to the key. */ - if (--cnt == 0) - break; - if ((ret = __bam_adjindx(dbc, h, indx, first, 0)) != 0) - goto err; - } - - /* Put in a new data item that points to the duplicates page. */ - UMRW(bo.unused1); - B_TSET(bo.type, B_DUPLICATE, 0); - UMRW(bo.unused2); - bo.pgno = cp->pgno; - bo.tlen = 0; - - OVPUT(h, indx, bo); - - return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY)); - -err: (void)__bam_free(dbc, cp); - return (ret); -} - -/* - * __bam_fixed -- - * Build the real record for a fixed length put. - */ -static int -__bam_fixed(dbc, dbt) - DBC *dbc; - DBT *dbt; -{ - DB *dbp; - RECNO *rp; - int ret; - - dbp = dbc->dbp; - rp = ((BTREE *)dbp->internal)->recno; - - /* - * If database contains fixed-length records, and the record is long, - * return EINVAL. - */ - if (dbt->size > rp->re_len) - return (EINVAL); - - /* - * The caller checked to see if it was just right, so we know it's - * short. Pad it out. We use the record data return memory, it's - * only a short-term use. - */ - if (dbc->rdata.ulen < rp->re_len) { - if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; - return (ret); - } - dbc->rdata.ulen = rp->re_len; - } - memcpy(dbc->rdata.data, dbt->data, dbt->size); - memset((u_int8_t *)dbc->rdata.data + dbt->size, - rp->re_pad, rp->re_len - dbt->size); - - /* - * Clean up our flags and other information just in case, and - * change the caller's DBT to reference our created record. - */ - dbc->rdata.size = rp->re_len; - dbc->rdata.dlen = 0; - dbc->rdata.doff = 0; - dbc->rdata.flags = 0; - *dbt = dbc->rdata; - - return (0); -} - -/* - * __bam_partial -- - * Build the real record for a partial put. - */ -static int -__bam_partial(dbc, dbt, h, indx, nbytes, flags) - DBC *dbc; - DBT *dbt; - PAGE *h; - u_int32_t indx, nbytes, flags; -{ - BKEYDATA *bk, tbk; - BOVERFLOW *bo; - DB *dbp; - DBT copy; - u_int32_t len, tlen; - u_int8_t *p; - int ret; - - COMPQUIET(bo, NULL); - - dbp = dbc->dbp; - - /* We use the record data return memory, it's only a short-term use. */ - if (dbc->rdata.ulen < nbytes) { - if ((ret = __os_realloc(&dbc->rdata.data, nbytes)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; - return (ret); - } - dbc->rdata.ulen = nbytes; - } - - /* - * We use nul bytes for any part of the record that isn't specified; - * get it over with. - */ - memset(dbc->rdata.data, 0, nbytes); - - /* - * In the next clauses, we need to do three things: a) set p to point - * to the place at which to copy the user's data, b) set tlen to the - * total length of the record, not including the bytes contributed by - * the user, and c) copy any valid data from an existing record. - */ - if (LF_ISSET(BI_NEWKEY)) { - tlen = dbt->doff; - p = (u_int8_t *)dbc->rdata.data + dbt->doff; - goto ucopy; - } - - /* Find the current record. */ - if (indx < NUM_ENT(h)) { - bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); - bo = (BOVERFLOW *)bk; - } else { - bk = &tbk; - B_TSET(bk->type, B_KEYDATA, 0); - bk->len = 0; - } - if (B_TYPE(bk->type) == B_OVERFLOW) { - /* - * In the case of an overflow record, we shift things around - * in the current record rather than allocate a separate copy. - */ - memset(©, 0, sizeof(copy)); - if ((ret = __db_goff(dbp, ©, bo->tlen, - bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) - return (ret); - - /* Skip any leading data from the original record. */ - tlen = dbt->doff; - p = (u_int8_t *)dbc->rdata.data + dbt->doff; - - /* - * Copy in any trailing data from the original record. - * - * If the original record was larger than the original offset - * plus the bytes being deleted, there is trailing data in the - * original record we need to preserve. If we aren't deleting - * the same number of bytes as we're inserting, copy it up or - * down, into place. - * - * Use memmove(), the regions may overlap. - */ - if (bo->tlen > dbt->doff + dbt->dlen) { - len = bo->tlen - (dbt->doff + dbt->dlen); - if (dbt->dlen != dbt->size) - memmove(p + dbt->size, p + dbt->dlen, len); - tlen += len; - } - } else { - /* Copy in any leading data from the original record. */ - memcpy(dbc->rdata.data, - bk->data, dbt->doff > bk->len ? bk->len : dbt->doff); - tlen = dbt->doff; - p = (u_int8_t *)dbc->rdata.data + dbt->doff; - - /* Copy in any trailing data from the original record. */ - len = dbt->doff + dbt->dlen; - if (bk->len > len) { - memcpy(p + dbt->size, bk->data + len, bk->len - len); - tlen += bk->len - len; - } - } - -ucopy: /* - * Copy in the application provided data -- p and tlen must have been - * initialized above. - */ - memcpy(p, dbt->data, dbt->size); - tlen += dbt->size; - - /* Set the DBT to reference our new record. */ - dbc->rdata.size = tlen; - dbc->rdata.dlen = 0; - dbc->rdata.doff = 0; - dbc->rdata.flags = 0; - *dbt = dbc->rdata; - return (0); -} diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c deleted file mode 100644 index de6b3b7d0e..0000000000 --- a/db2/btree/bt_rec.c +++ /dev/null @@ -1,903 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_rec.c 10.28 (Sleepycat) 9/27/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "shqueue.h" -#include "hash.h" -#include "btree.h" -#include "log.h" -#include "common_ext.h" - -/* - * __bam_pg_alloc_recover -- - * Recovery function for pg_alloc. - * - * PUBLIC: int __bam_pg_alloc_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_pg_alloc_args *argp; - BTMETA *meta; - DB_MPOOLFILE *mpf; - PAGE *pagep; - DB *file_dbp; - DBC *dbc; - db_pgno_t pgno; - int cmp_n, cmp_p, modified, ret; - - REC_PRINT(__bam_pg_alloc_print); - REC_INTRO(__bam_pg_alloc_read); - - /* - * Fix up the allocated page. If we're redoing the operation, we have - * to get the page (creating it if it doesn't exist), and update its - * LSN. If we're undoing the operation, we have to reset the page's - * LSN and put it on the free list. - * - * Fix up the metadata page. If we're redoing the operation, we have - * to get the metadata page and update its LSN and its free pointer. - * If we're undoing the operation and the page was ever created, we put - * it on the freelist. - */ - pgno = PGNO_METADATA; - if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) { - /* The metadata page must always exist. */ - (void)__db_pgerr(file_dbp, pgno); - goto out; - } - if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) { - /* - * We specify creation and check for it later, because this - * operation was supposed to create the page, and even in - * the undo case it's going to get linked onto the freelist - * which we're also fixing up. - */ - (void)__db_pgerr(file_dbp, argp->pgno); - (void)memp_fput(mpf, meta, 0); - goto out; - } - - /* Fix up the allocated page. */ - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->page_lsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype); - - pagep->lsn = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID); - - pagep->lsn = argp->page_lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { - (void)memp_fput(mpf, meta, 0); - goto out; - } - - /* Fix up the metadata page. */ - modified = 0; - cmp_n = log_compare(lsnp, &LSN(meta)); - cmp_p = log_compare(&LSN(meta), &argp->meta_lsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - meta->lsn = *lsnp; - meta->free = argp->next; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - meta->lsn = argp->meta_lsn; - meta->free = argp->pgno; - modified = 1; - } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} - -/* - * __bam_pg_free_recover -- - * Recovery function for pg_free. - * - * PUBLIC: int __bam_pg_free_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_pg_free_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_pg_free_args *argp; - BTMETA *meta; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, modified, ret; - - REC_PRINT(__bam_pg_free_print); - REC_INTRO(__bam_pg_free_read); - - /* - * Fix up the freed page. If we're redoing the operation we get the - * page and explicitly discard its contents, then update its LSN. If - * we're undoing the operation, we get the page and restore its header. - */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { - /* - * We don't automatically create the page. The only way the - * page might not exist is if the alloc never happened, and - * the only way the alloc might never have happened is if we - * are undoing, in which case there's no reason to create the - * page. - */ - if (!redo) - goto done; - (void)__db_pgerr(file_dbp, argp->pgno); - goto out; - } - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &LSN(argp->header.data)); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - P_INIT(pagep, file_dbp->pgsize, - pagep->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - pagep->lsn = *lsnp; - - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - memcpy(pagep, argp->header.data, argp->header.size); - - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - - /* - * Fix up the metadata page. If we're redoing or undoing the operation - * we get the page and update its LSN and free pointer. - */ - pgno = PGNO_METADATA; - if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) { - /* The metadata page must always exist. */ - (void)__db_pgerr(file_dbp, pgno); - goto out; - } - - modified = 0; - cmp_n = log_compare(lsnp, &LSN(meta)); - cmp_p = log_compare(&LSN(meta), &argp->meta_lsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - meta->free = argp->pgno; - - meta->lsn = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - meta->free = argp->next; - - meta->lsn = argp->meta_lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} - -/* - * __bam_split_recover -- - * Recovery function for split. - * - * PUBLIC: int __bam_split_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_split_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_split_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp; - db_pgno_t pgno; - int l_update, p_update, r_update, ret, rootsplit, t_ret; - - REC_PRINT(__bam_split_print); - - mpf = NULL; - _lp = lp = np = pp = _rp = rp = NULL; - - REC_INTRO(__bam_split_read); - - /* - * There are two kinds of splits that we have to recover from. The - * first is a root-page split, where the root page is split from a - * leaf page into an internal page and two new leaf pages are created. - * The second is where a page is split into two pages, and a new key - * is inserted into the parent page. - */ - sp = argp->pg.data; - pgno = PGNO(sp); - rootsplit = pgno == PGNO_ROOT; - if (memp_fget(mpf, &argp->left, 0, &lp) != 0) - lp = NULL; - if (memp_fget(mpf, &argp->right, 0, &rp) != 0) - rp = NULL; - - if (redo) { - l_update = r_update = p_update = 0; - /* - * Decide if we need to resplit the page. - * - * If this is a root split, then the root has to exist, it's - * the page we're splitting and it gets modified. If this is - * not a root split, then the left page has to exist, for the - * same reason. - */ - if (rootsplit) { - if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) { - (void)__db_pgerr(file_dbp, pgno); - pp = NULL; - goto out; - } - p_update = - log_compare(&LSN(pp), &LSN(argp->pg.data)) == 0; - } else - if (lp == NULL) { - (void)__db_pgerr(file_dbp, argp->left); - goto out; - } - if (lp == NULL || log_compare(&LSN(lp), &argp->llsn) == 0) - l_update = 1; - if (rp == NULL || log_compare(&LSN(rp), &argp->rlsn) == 0) - r_update = 1; - if (!p_update && !l_update && !r_update) - goto done; - - /* Allocate and initialize new left/right child pages. */ - if ((ret = __os_malloc(file_dbp->pgsize, NULL, &_lp)) != 0 || - (ret = __os_malloc(file_dbp->pgsize, NULL, &_rp)) != 0) - goto out; - if (rootsplit) { - P_INIT(_lp, file_dbp->pgsize, argp->left, - PGNO_INVALID, - ISINTERNAL(sp) ? PGNO_INVALID : argp->right, - LEVEL(sp), TYPE(sp)); - P_INIT(_rp, file_dbp->pgsize, argp->right, - ISINTERNAL(sp) ? PGNO_INVALID : argp->left, - PGNO_INVALID, LEVEL(sp), TYPE(sp)); - } else { - P_INIT(_lp, file_dbp->pgsize, PGNO(sp), - ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp), - ISINTERNAL(sp) ? PGNO_INVALID : argp->right, - LEVEL(sp), TYPE(sp)); - P_INIT(_rp, file_dbp->pgsize, argp->right, - ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno, - ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp), - LEVEL(sp), TYPE(sp)); - } - - /* Split the page. */ - if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 || - (ret = __bam_copy(file_dbp, sp, _rp, argp->indx, - NUM_ENT(sp))) != 0) - goto out; - - /* If the left child is wrong, update it. */ - if (lp == NULL && (ret = - memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) { - (void)__db_pgerr(file_dbp, argp->left); - lp = NULL; - goto out; - } - if (l_update) { - memcpy(lp, _lp, file_dbp->pgsize); - lp->lsn = *lsnp; - if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0) - goto out; - lp = NULL; - } - - /* If the right child is wrong, update it. */ - if (rp == NULL && (ret = memp_fget(mpf, - &argp->right, DB_MPOOL_CREATE, &rp)) != 0) { - (void)__db_pgerr(file_dbp, argp->right); - rp = NULL; - goto out; - } - if (r_update) { - memcpy(rp, _rp, file_dbp->pgsize); - rp->lsn = *lsnp; - if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0) - goto out; - rp = NULL; - } - - /* - * If the parent page is wrong, update it. This is of interest - * only if it was a root split, since root splits create parent - * pages. All other splits modify a parent page, but those are - * separately logged and recovered. - */ - if (rootsplit && p_update) { - if (file_dbp->type == DB_BTREE) - P_INIT(pp, file_dbp->pgsize, - PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, - _lp->level + 1, P_IBTREE); - else - P_INIT(pp, file_dbp->pgsize, - PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, - _lp->level + 1, P_IRECNO); - RE_NREC_SET(pp, - file_dbp->type == DB_RECNO || - F_ISSET(file_dbp, DB_BT_RECNUM) ? - __bam_total(_lp) + __bam_total(_rp) : 0); - pp->lsn = *lsnp; - if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0) - goto out; - pp = NULL; - } - - /* - * Finally, redo the next-page link if necessary. This is of - * interest only if it wasn't a root split -- inserting a new - * page in the tree requires that any following page have its - * previous-page pointer updated to our new page. The next - * page must exist because we're redoing the operation. - */ - if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) { - if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) { - (void)__db_pgerr(file_dbp, argp->npgno); - np = NULL; - goto out; - } - if (log_compare(&LSN(np), &argp->nlsn) == 0) { - PREV_PGNO(np) = argp->right; - np->lsn = *lsnp; - if ((ret = - memp_fput(mpf, np, DB_MPOOL_DIRTY)) != 0) - goto out; - np = NULL; - } - } - } else { - /* - * If the split page is wrong, replace its contents with the - * logged page contents. If the page doesn't exist, it means - * that the create of the page never happened, nor did any of - * the adds onto the page that caused the split, and there's - * really no undo-ing to be done. - */ - if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) { - pp = NULL; - goto lrundo; - } - if (log_compare(lsnp, &LSN(pp)) == 0) { - memcpy(pp, argp->pg.data, argp->pg.size); - if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0) - goto out; - pp = NULL; - } - - /* - * If it's a root split and the left child ever existed, update - * its LSN. (If it's not a root split, we've updated the left - * page already -- it's the same as the split page.) If the - * right child ever existed, root split or not, update its LSN. - * The undo of the page allocation(s) will restore them to the - * free list. - */ -lrundo: if ((rootsplit && lp != NULL) || rp != NULL) { - if (rootsplit && lp != NULL && - log_compare(lsnp, &LSN(lp)) == 0) { - lp->lsn = argp->llsn; - if ((ret = - memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0) - goto out; - lp = NULL; - } - if (rp != NULL && - log_compare(lsnp, &LSN(rp)) == 0) { - rp->lsn = argp->rlsn; - if ((ret = - memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0) - goto out; - rp = NULL; - } - } - - /* - * Finally, undo the next-page link if necessary. This is of - * interest only if it wasn't a root split -- inserting a new - * page in the tree requires that any following page have its - * previous-page pointer updated to our new page. Since it's - * possible that the next-page never existed, we ignore it as - * if there's nothing to undo. - */ - if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) { - if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) { - np = NULL; - goto done; - } - if (log_compare(lsnp, &LSN(np)) == 0) { - PREV_PGNO(np) = argp->left; - np->lsn = argp->nlsn; - if (memp_fput(mpf, np, DB_MPOOL_DIRTY)) - goto out; - np = NULL; - } - } - } - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: /* Free any pages that weren't dirtied. */ - if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0) - ret = t_ret; - if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0) - ret = t_ret; - if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0) - ret = t_ret; - if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0) - ret = t_ret; - - /* Free any allocated space. */ - if (_lp != NULL) - __os_free(_lp, file_dbp->pgsize); - if (_rp != NULL) - __os_free(_rp, file_dbp->pgsize); - - REC_CLOSE; -} - -/* - * __bam_rsplit_recover -- - * Recovery function for a reverse split. - * - * PUBLIC: int __bam_rsplit_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_rsplit_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_rsplit_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, modified, ret; - - REC_PRINT(__bam_rsplit_print); - REC_INTRO(__bam_rsplit_read); - - /* Fix the root page. */ - pgno = PGNO_ROOT; - if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) { - /* The root page must always exist. */ - __db_pgerr(file_dbp, pgno); - goto out; - } - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->rootlsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); - pagep->pgno = PGNO_ROOT; - pagep->lsn = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT, - argp->nrec, PGNO_INVALID, pagep->level + 1, - file_dbp->type == DB_BTREE ? P_IBTREE : P_IRECNO); - if ((ret = __db_pitem(dbc, pagep, 0, - argp->rootent.size, &argp->rootent, NULL)) != 0) - goto out; - pagep->lsn = argp->rootlsn; - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - - /* - * Fix the page copied over the root page. It's possible that the - * page never made it to disk, so if we're undo-ing and the page - * doesn't exist, it's okay and there's nothing further to do. - */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (!redo) - goto done; - (void)__db_pgerr(file_dbp, argp->pgno); - goto out; - } - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &LSN(argp->pgdbt.data)); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - pagep->lsn = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} - -/* - * __bam_adj_recover -- - * Recovery function for adj. - * - * PUBLIC: int __bam_adj_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_adj_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_adj_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - - REC_PRINT(__bam_adj_print); - REC_INTRO(__bam_adj_read); - - /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (!redo) - goto done; - (void)__db_pgerr(file_dbp, argp->pgno); - goto out; - } - - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->lsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - if ((ret = __bam_adjindx(dbc, - pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0) - goto err; - - LSN(pagep) = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - if ((ret = __bam_adjindx(dbc, - pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0) - goto err; - - LSN(pagep) = argp->lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - - if (0) { -err: (void)memp_fput(mpf, pagep, 0); - } -out: REC_CLOSE; -} - -/* - * __bam_cadjust_recover -- - * Recovery function for the adjust of a count change in an internal - * page. - * - * PUBLIC: int __bam_cadjust_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_cadjust_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_cadjust_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - - REC_PRINT(__bam_cadjust_print); - REC_INTRO(__bam_cadjust_read); - - /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (!redo) - goto done; - (void)__db_pgerr(file_dbp, argp->pgno); - goto out; - } - - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->lsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - if (file_dbp->type == DB_BTREE && - F_ISSET(file_dbp, DB_BT_RECNUM)) { - GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust; - if (argp->total && PGNO(pagep) == PGNO_ROOT) - RE_NREC_ADJ(pagep, argp->adjust); - } - if (file_dbp->type == DB_RECNO) { - GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust; - if (argp->total && PGNO(pagep) == PGNO_ROOT) - RE_NREC_ADJ(pagep, argp->adjust); - } - - LSN(pagep) = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - if (file_dbp->type == DB_BTREE && - F_ISSET(file_dbp, DB_BT_RECNUM)) { - GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust; - if (argp->total && PGNO(pagep) == PGNO_ROOT) - RE_NREC_ADJ(pagep, argp->adjust); - } - if (file_dbp->type == DB_RECNO) { - GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust; - if (argp->total && PGNO(pagep) == PGNO_ROOT) - RE_NREC_ADJ(pagep, -(argp->adjust)); - } - LSN(pagep) = argp->lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} - -/* - * __bam_cdel_recover -- - * Recovery function for the intent-to-delete of a cursor record. - * - * PUBLIC: int __bam_cdel_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_cdel_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_cdel_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - - REC_PRINT(__bam_cdel_print); - REC_INTRO(__bam_cdel_read); - - /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (!redo) - goto done; - (void)__db_pgerr(file_dbp, argp->pgno); - goto out; - } - - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->lsn); - if (cmp_p == 0 && redo) { - /* Need to redo update described. */ - if (pagep->type == P_DUPLICATE) - B_DSET(GET_BKEYDATA(pagep, argp->indx)->type); - else - B_DSET(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type); - - LSN(pagep) = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* Need to undo update described. */ - if (pagep->type == P_DUPLICATE) - B_DCLR(GET_BKEYDATA(pagep, argp->indx)->type); - else - B_DCLR(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type); - - LSN(pagep) = argp->lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} - -/* - * __bam_repl_recover -- - * Recovery function for page item replacement. - * - * PUBLIC: int __bam_repl_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_repl_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __bam_repl_args *argp; - BKEYDATA *bk; - DB *file_dbp; - DBC *dbc; - DBT dbt; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - u_int8_t *p; - - REC_PRINT(__bam_repl_print); - REC_INTRO(__bam_repl_read); - - /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (!redo) - goto done; - (void)__db_pgerr(file_dbp, argp->pgno); - goto out; - } - bk = GET_BKEYDATA(pagep, argp->indx); - - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->lsn); - if (cmp_p == 0 && redo) { - /* - * Need to redo update described. - * - * Re-build the replacement item. - */ - memset(&dbt, 0, sizeof(dbt)); - dbt.size = argp->prefix + argp->suffix + argp->repl.size; - if ((ret = __os_malloc(dbt.size, NULL, &dbt.data)) != 0) - goto err; - p = dbt.data; - memcpy(p, bk->data, argp->prefix); - p += argp->prefix; - memcpy(p, argp->repl.data, argp->repl.size); - p += argp->repl.size; - memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); - - ret = __bam_ritem(dbc, pagep, argp->indx, &dbt); - __os_free(dbt.data, dbt.size); - if (ret != 0) - goto err; - - LSN(pagep) = *lsnp; - modified = 1; - } else if (cmp_n == 0 && !redo) { - /* - * Need to undo update described. - * - * Re-build the original item. - */ - memset(&dbt, 0, sizeof(dbt)); - dbt.size = argp->prefix + argp->suffix + argp->orig.size; - if ((ret = __os_malloc(dbt.size, NULL, &dbt.data)) != 0) - goto err; - p = dbt.data; - memcpy(p, bk->data, argp->prefix); - p += argp->prefix; - memcpy(p, argp->orig.data, argp->orig.size); - p += argp->orig.size; - memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); - - ret = __bam_ritem(dbc, pagep, argp->indx, &dbt); - __os_free(dbt.data, dbt.size); - if (ret != 0) - goto err; - - /* Reset the deleted flag, if necessary. */ - if (argp->isdeleted) - B_DSET(GET_BKEYDATA(pagep, argp->indx)->type); - - LSN(pagep) = argp->lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - - if (0) { -err: (void)memp_fput(mpf, pagep, 0); - } -out: REC_CLOSE; -} diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c deleted file mode 100644 index c69877ff7f..0000000000 --- a/db2/btree/bt_recno.c +++ /dev/null @@ -1,1356 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_recno.c 10.53 (Sleepycat) 12/11/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" -#include "db_ext.h" -#include "shqueue.h" -#include "db_shash.h" -#include "lock.h" -#include "lock_ext.h" - -static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t)); -static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); -static int __ram_fmap __P((DBC *, db_recno_t)); -static int __ram_i_delete __P((DBC *)); -static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); -static int __ram_source __P((DB *, RECNO *, const char *)); -static int __ram_sync __P((DB *, u_int32_t)); -static int __ram_update __P((DBC *, db_recno_t, int)); -static int __ram_vmap __P((DBC *, db_recno_t)); -static int __ram_writeback __P((DBC *)); - -/* - * In recno, there are two meanings to the on-page "deleted" flag. If we're - * re-numbering records, it means the record was implicitly created. We skip - * over implicitly created records if doing a cursor "next" or "prev", and - * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering - * records, it means that the record was implicitly created, or was deleted. - * We skip over implicitly created or deleted records if doing a cursor "next" - * or "prev", and return DB_KEYEMPTY if they're explicitly requested. - * - * If we're re-numbering records, then we have to detect in the cursor that - * a record was deleted, and adjust the cursor as necessary on the next get. - * If we're not re-numbering records, then we can detect that a record has - * been deleted by looking at the actual on-page record, so we completely - * ignore the cursor's delete flag. This is different from the B+tree code. - * It also maintains whether the cursor references a deleted record in the - * cursor, and it doesn't always check the on-page value. - */ -#define CD_SET(dbp, cp) { \ - if (F_ISSET(dbp, DB_RE_RENUMBER)) \ - F_SET(cp, C_DELETED); \ -} -#define CD_CLR(dbp, cp) { \ - if (F_ISSET(dbp, DB_RE_RENUMBER)) \ - F_CLR(cp, C_DELETED); \ -} -#define CD_ISSET(dbp, cp) \ - (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, C_DELETED)) - -/* - * __ram_open -- - * Recno open function. - * - * PUBLIC: int __ram_open __P((DB *, DB_INFO *)); - */ -int -__ram_open(dbp, dbinfo) - DB *dbp; - DB_INFO *dbinfo; -{ - BTREE *t; - DBC *dbc; - RECNO *rp; - int ret, t_ret; - - /* Allocate and initialize the private btree structure. */ - if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0) - return (ret); - dbp->internal = t; - __bam_setovflsize(dbp); - - /* Allocate and initialize the private recno structure. */ - if ((ret = __os_calloc(1, sizeof(*rp), &rp)) != 0) - return (ret); - /* Link in the private recno structure. */ - t->recno = rp; - - /* - * Intention is to make sure all of the user's selections are okay - * here and then use them without checking. - */ - if (dbinfo == NULL) { - rp->re_delim = '\n'; - rp->re_pad = ' '; - rp->re_fd = -1; - F_SET(rp, RECNO_EOF); - } else { - /* - * If the user specified a source tree, open it and map it in. - * - * !!! - * We don't complain if the user specified transactions or - * threads. It's possible to make it work, but you'd better - * know what you're doing! - */ - if (dbinfo->re_source == NULL) { - rp->re_fd = -1; - F_SET(rp, RECNO_EOF); - } else { - if ((ret = - __ram_source(dbp, rp, dbinfo->re_source)) != 0) - goto err; - } - - /* Copy delimiter, length and padding values. */ - rp->re_delim = - F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n'; - rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' '; - - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if ((rp->re_len = dbinfo->re_len) == 0) { - __db_err(dbp->dbenv, - "record length must be greater than 0"); - ret = EINVAL; - goto err; - } - } else - rp->re_len = 0; - } - - /* Initialize the remaining fields/methods of the DB. */ - dbp->am_close = __ram_close; - dbp->del = __ram_delete; - dbp->put = __ram_put; - dbp->stat = __bam_stat; - dbp->sync = __ram_sync; - - /* Start up the tree. */ - if ((ret = __bam_read_root(dbp)) != 0) - goto err; - - /* Set the overflow page size. */ - __bam_setovflsize(dbp); - - /* If we're snapshotting an underlying source file, do it now. */ - if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT)) { - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) - goto err; - - /* Do the snapshot. */ - if ((ret = __ram_update(dbc, - DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND) - ret = 0; - - /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - if (ret != 0) - goto err; - } - - return (0); - -err: /* If we mmap'd a source file, discard it. */ - if (rp->re_smap != NULL) - (void)__db_unmapfile(rp->re_smap, rp->re_msize); - - /* If we opened a source file, discard it. */ - if (rp->re_fd != -1) - (void)__os_close(rp->re_fd); - if (rp->re_source != NULL) - __os_freestr(rp->re_source); - - __os_free(rp, sizeof(*rp)); - - return (ret); -} - -/* - * __ram_delete -- - * Recno db->del function. - */ -static int -__ram_delete(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - CURSOR *cp; - DBC *dbc; - db_recno_t recno; - int ret, t_ret; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_delchk(dbp, - key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) - return (ret); - - /* Acquire a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags); - - /* Check the user's record number and fill in as necessary. */ - if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0) - goto err; - - /* Do the delete. */ - cp = dbc->internal; - cp->recno = recno; - ret = __ram_i_delete(dbc); - - /* Release the cursor. */ -err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __ram_i_delete -- - * Internal version of recno delete, called by __ram_delete and - * __ram_c_del. - */ -static int -__ram_i_delete(dbc) - DBC *dbc; -{ - BKEYDATA bk; - BTREE *t; - CURSOR *cp; - DB *dbp; - DBT hdr, data; - PAGE *h; - db_indx_t indx; - int exact, ret, stack; - - dbp = dbc->dbp; - cp = dbc->internal; - t = dbp->internal; - stack = 0; - - /* - * If this is CDB and this isn't a write cursor, then it's an error. - * If it is a write cursor, but we don't yet hold the write lock, then - * we need to upgrade to the write lock. - */ - if (F_ISSET(dbp, DB_AM_CDB)) { - /* Make sure it's a valid update cursor. */ - if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) - return (EINVAL); - - if (F_ISSET(dbc, DBC_RMW) && - (ret = lock_get(dbp->dbenv->lk_info, dbc->locker, - DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &dbc->mylock)) != 0) - return (EAGAIN); - } - - /* Search the tree for the key; delete only deletes exact matches. */ - if ((ret = __bam_rsearch(dbc, &cp->recno, S_DELETE, 1, &exact)) != 0) - goto err; - if (!exact) { - ret = DB_NOTFOUND; - goto err; - } - stack = 1; - - h = cp->csp->page; - indx = cp->csp->indx; - - /* - * If re-numbering records, the on-page deleted flag can only mean - * that this record was implicitly created. Applications aren't - * permitted to delete records they never created, return an error. - * - * If not re-numbering records, the on-page deleted flag means that - * this record was implicitly created, or, was deleted at some time. - * The former is an error because applications aren't permitted to - * delete records they never created, the latter is an error because - * if the record was "deleted", we could never have found it. - */ - if (B_DISSET(GET_BKEYDATA(h, indx)->type)) { - ret = DB_KEYEMPTY; - goto err; - } - - if (F_ISSET(dbp, DB_RE_RENUMBER)) { - /* Delete the item, adjust the counts, adjust the cursors. */ - if ((ret = __bam_ditem(dbc, h, indx)) != 0) - goto err; - __bam_adjust(dbc, -1); - __ram_ca(dbp, cp->recno, CA_DELETE); - - /* - * If the page is empty, delete it. The whole tree is locked - * so there are no preparations to make. - */ - if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) { - stack = 0; - ret = __bam_dpages(dbc); - } - } else { - /* Use a delete/put pair to replace the record with a marker. */ - if ((ret = __bam_ditem(dbc, h, indx)) != 0) - goto err; - - B_TSET(bk.type, B_KEYDATA, 1); - bk.len = 0; - memset(&hdr, 0, sizeof(hdr)); - hdr.data = &bk; - hdr.size = SSZA(BKEYDATA, data); - memset(&data, 0, sizeof(data)); - data.data = (char *)""; - data.size = 0; - if ((ret = __db_pitem(dbc, - h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0) - goto err; - } - F_SET(t->recno, RECNO_MODIFIED); - -err: if (stack) - __bam_stkrel(dbc, 0); - - /* If we upgraded the CDB lock upon entry; downgrade it now. */ - if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) - (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, - DB_LOCK_IWRITE, 0); - return (ret); -} - -/* - * __ram_put -- - * Recno db->put function. - */ -static int -__ram_put(dbp, txn, key, data, flags) - DB *dbp; - DB_TXN *txn; - DBT *key, *data; - u_int32_t flags; -{ - DBC *dbc; - db_recno_t recno; - int ret, t_ret; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_putchk(dbp, - key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0) - return (ret); - - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags); - - /* - * If we're appending to the tree, make sure we've read in all of - * the backing source file. Otherwise, check the user's record - * number and fill in as necessary. - */ - ret = flags == DB_APPEND ? - __ram_update(dbc, DB_MAX_RECORDS, 0) : - __ram_getno(dbc, key, &recno, 1); - - /* Add the record. */ - if (ret == 0) - ret = __ram_add(dbc, &recno, data, flags, 0); - - /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - /* Return the record number if we're appending to the tree. */ - if (ret == 0 && flags == DB_APPEND) - *(db_recno_t *)key->data = recno; - - return (ret); -} - -/* - * __ram_sync -- - * Recno db->sync function. - */ -static int -__ram_sync(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - DBC *dbc; - int ret, t_ret; - - /* - * Sync the underlying btree. - * - * !!! - * We don't need to do a panic check or flags check, the "real" - * sync function does all that for us. - */ - if ((ret = __db_sync(dbp, flags)) != 0) - return (ret); - - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, NULL, "ram_sync", NULL, NULL, flags); - - /* Copy back the backing source file. */ - ret = __ram_writeback(dbc); - - /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __ram_close -- - * Recno db->close function. - * - * PUBLIC: int __ram_close __P((DB *)); - */ -int -__ram_close(dbp) - DB *dbp; -{ - RECNO *rp; - - rp = ((BTREE *)dbp->internal)->recno; - - /* Close any underlying mmap region. */ - if (rp->re_smap != NULL) - (void)__db_unmapfile(rp->re_smap, rp->re_msize); - - /* Close any backing source file descriptor. */ - if (rp->re_fd != -1) - (void)__os_close(rp->re_fd); - - /* Free any backing source file name. */ - if (rp->re_source != NULL) - __os_freestr(rp->re_source); - - /* Free allocated memory. */ - __os_free(rp, sizeof(RECNO)); - ((BTREE *)dbp->internal)->recno = NULL; - - /* Close the underlying btree. */ - return (__bam_close(dbp)); -} - -/* - * __ram_c_del -- - * Recno cursor->c_del function. - * - * PUBLIC: int __ram_c_del __P((DBC *, u_int32_t)); - */ -int -__ram_c_del(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - CURSOR *cp; - DB *dbp; - int ret; - - dbp = dbc->dbp; - cp = dbc->internal; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_cdelchk(dbp, flags, - F_ISSET(dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, dbc->txn, "ram_c_del", NULL, NULL, flags); - - /* - * If we are running CDB, this had better be either a write - * cursor or an immediate writer. - */ - if (F_ISSET(dbp, DB_AM_CDB)) - if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) - return (EINVAL); - - /* - * The semantics of cursors during delete are as follows: if record - * numbers are mutable (DB_RE_RENUMBER is set), deleting a record - * causes the cursor to automatically point to the record immediately - * following. In this case it is possible to use a single cursor for - * repeated delete operations, without intervening operations. - * - * If record numbers are not mutable, then records are replaced with - * a marker containing a delete flag. If the record referenced by - * this cursor has already been deleted, we will detect that as part - * of the delete operation, and fail. - */ - return (__ram_i_delete(dbc)); -} - -/* - * __ram_c_get -- - * Recno cursor->c_get function. - * - * PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__ram_c_get(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - CURSOR *cp, copy; - DB *dbp; - PAGE *h; - db_indx_t indx; - int exact, ret, stack, tmp_rmw; - - dbp = dbc->dbp; - cp = dbc->internal; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_cgetchk(dbc->dbp, - key, data, flags, cp->recno != RECNO_OOB)) != 0) - return (ret); - - /* Clear OR'd in additional bits so we can check for flag equality. */ - tmp_rmw = 0; - if (LF_ISSET(DB_RMW)) { - if (!F_ISSET(dbp, DB_AM_CDB)) { - tmp_rmw = 1; - F_SET(dbc, DBC_RMW); - } - LF_CLR(DB_RMW); - } - - DEBUG_LREAD(dbc, dbc->txn, "ram_c_get", - flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); - - /* Initialize the cursor for a new retrieval. */ - copy = *cp; - -retry: /* Update the record number. */ - stack = 0; - switch (flags) { - case DB_CURRENT: - /* - * If record numbers are mutable: if we just deleted a record, - * there is no action necessary, we return the record following - * the deleted item by virtue of renumbering the tree. - */ - break; - case DB_NEXT: - /* - * If record numbers are mutable: if we just deleted a record, - * we have to avoid incrementing the record number so that we - * return the right record by virtue of renumbering the tree. - */ - if (CD_ISSET(dbp, cp)) - break; - - if (cp->recno != RECNO_OOB) { - ++cp->recno; - break; - } - /* FALLTHROUGH */ - case DB_FIRST: - flags = DB_NEXT; - cp->recno = 1; - break; - case DB_PREV: - if (cp->recno != RECNO_OOB) { - if (cp->recno == 1) { - ret = DB_NOTFOUND; - goto err; - } - --cp->recno; - break; - } - /* FALLTHROUGH */ - case DB_LAST: - flags = DB_PREV; - if (((ret = __ram_update(dbc, - DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND) - goto err; - if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0) - goto err; - if (cp->recno == 0) { - ret = DB_NOTFOUND; - goto err; - } - break; - case DB_SET: - case DB_SET_RANGE: - if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0) - goto err; - break; - } - - /* Return the key if the user didn't give us one. */ - if (flags != DB_SET && flags != DB_SET_RANGE && - (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno), - &dbc->rkey.data, &dbc->rkey.ulen, dbp->db_malloc)) != 0) - goto err; - - /* Search the tree for the record. */ - if ((ret = __bam_rsearch(dbc, &cp->recno, - F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 1, &exact)) != 0) - goto err; - stack = 1; - if (!exact) { - ret = DB_NOTFOUND; - goto err; - } - h = cp->csp->page; - indx = cp->csp->indx; - - /* - * If re-numbering records, the on-page deleted flag means this record - * was implicitly created. If not re-numbering records, the on-page - * deleted flag means this record was implicitly created, or, it was - * deleted at some time. Regardless, we skip such records if doing - * cursor next/prev operations, and fail if the application requested - * them explicitly. - */ - if (B_DISSET(GET_BKEYDATA(h, indx)->type)) { - if (flags == DB_NEXT || flags == DB_PREV) { - (void)__bam_stkrel(dbc, 0); - goto retry; - } - ret = DB_KEYEMPTY; - goto err; - } - - /* Return the data item. */ - if ((ret = __db_ret(dbp, - h, indx, data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) - goto err; - - /* The cursor was reset, no further delete adjustment is necessary. */ - CD_CLR(dbp, cp); - -err: if (stack) - (void)__bam_stkrel(dbc, 0); - - /* Release temporary lock upgrade. */ - if (tmp_rmw) - F_CLR(dbc, DBC_RMW); - - if (ret != 0) - *cp = copy; - - return (ret); -} - -/* - * __ram_c_put -- - * Recno cursor->c_put function. - * - * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__ram_c_put(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - CURSOR *cp, copy; - DB *dbp; - int exact, ret; - void *arg; - - dbp = dbc->dbp; - cp = dbc->internal; - - DB_PANIC_CHECK(dbp); - - if ((ret = __db_cputchk(dbc->dbp, key, data, flags, - F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, dbc->txn, "ram_c_put", NULL, data, flags); - - /* - * If we are running CDB, this had better be either a write - * cursor or an immediate writer. If it's a regular writer, - * that means we have an IWRITE lock and we need to upgrade - * it to a write lock. - */ - if (F_ISSET(dbp, DB_AM_CDB)) { - if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) - return (EINVAL); - - if (F_ISSET(dbc, DBC_RMW) && - (ret = lock_get(dbp->dbenv->lk_info, dbc->locker, - DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &dbc->mylock)) != 0) - return (EAGAIN); - } - - /* Initialize the cursor for a new retrieval. */ - copy = *cp; - - /* - * To split, we need a valid key for the page. Since it's a cursor, - * we have to build one. - * - * The split code discards all short-term locks and stack pages. - */ - if (0) { -split: arg = &cp->recno; - if ((ret = __bam_split(dbc, arg)) != 0) - goto err; - } - - if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) - goto err; - if (!exact) { - ret = DB_NOTFOUND; - goto err; - } - if ((ret = __bam_iitem(dbc, &cp->csp->page, - &cp->csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) { - if ((ret = __bam_stkrel(dbc, 0)) != 0) - goto err; - goto split; - } - if ((ret = __bam_stkrel(dbc, 0)) != 0) - goto err; - - switch (flags) { - case DB_AFTER: - /* Adjust the cursors. */ - __ram_ca(dbp, cp->recno, CA_IAFTER); - - /* Set this cursor to reference the new record. */ - cp->recno = copy.recno + 1; - break; - case DB_BEFORE: - /* Adjust the cursors. */ - __ram_ca(dbp, cp->recno, CA_IBEFORE); - - /* Set this cursor to reference the new record. */ - cp->recno = copy.recno; - break; - } - - /* The cursor was reset, no further delete adjustment is necessary. */ - CD_CLR(dbp, cp); - -err: if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) - (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, - DB_LOCK_IWRITE, 0); - - if (ret != 0) - *cp = copy; - - return (ret); -} - -/* - * __ram_ca -- - * Adjust cursors. - * - * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg)); - */ -void -__ram_ca(dbp, recno, op) - DB *dbp; - db_recno_t recno; - ca_recno_arg op; -{ - CURSOR *cp; - DBC *dbc; - - /* - * Adjust the cursors. See the comment in __bam_ca_delete(). - */ - DB_THREAD_LOCK(dbp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = dbc->internal; - switch (op) { - case CA_DELETE: - if (recno > cp->recno) - --cp->recno; - if (recno == cp->recno) - CD_SET(dbp, cp); - break; - case CA_IAFTER: - if (recno > cp->recno) - ++cp->recno; - break; - case CA_IBEFORE: - if (recno >= cp->recno) - ++cp->recno; - break; - } - } - DB_THREAD_UNLOCK(dbp); -} - -/* - * __ram_getno -- - * Check the user's record number, and make sure we've seen it. - * - * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int)); - */ -int -__ram_getno(dbc, key, rep, can_create) - DBC *dbc; - const DBT *key; - db_recno_t *rep; - int can_create; -{ - DB *dbp; - db_recno_t recno; - - dbp = dbc->dbp; - - /* Check the user's record number. */ - if ((recno = *(db_recno_t *)key->data) == 0) { - __db_err(dbp->dbenv, "illegal record number of 0"); - return (EINVAL); - } - if (rep != NULL) - *rep = recno; - - /* - * Btree can neither create records nor read them in. Recno can - * do both, see if we can find the record. - */ - return (dbp->type == DB_RECNO ? - __ram_update(dbc, recno, can_create) : 0); -} - -/* - * __ram_update -- - * Ensure the tree has records up to and including the specified one. - */ -static int -__ram_update(dbc, recno, can_create) - DBC *dbc; - db_recno_t recno; - int can_create; -{ - BTREE *t; - DB *dbp; - RECNO *rp; - db_recno_t nrecs; - int ret; - - dbp = dbc->dbp; - t = dbp->internal; - rp = t->recno; - - /* - * If we can't create records and we've read the entire backing input - * file, we're done. - */ - if (!can_create && F_ISSET(rp, RECNO_EOF)) - return (0); - - /* - * If we haven't seen this record yet, try to get it from the original - * file. - */ - if ((ret = __bam_nrecs(dbc, &nrecs)) != 0) - return (ret); - if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) { - if ((ret = rp->re_irec(dbc, recno)) != 0) - return (ret); - if ((ret = __bam_nrecs(dbc, &nrecs)) != 0) - return (ret); - } - - /* - * If we can create records, create empty ones up to the requested - * record. - */ - if (!can_create || recno <= nrecs + 1) - return (0); - - dbc->rdata.dlen = 0; - dbc->rdata.doff = 0; - dbc->rdata.flags = 0; - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if (dbc->rdata.ulen < rp->re_len) { - if ((ret = - __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; - return (ret); - } - dbc->rdata.ulen = rp->re_len; - } - dbc->rdata.size = rp->re_len; - memset(dbc->rdata.data, rp->re_pad, rp->re_len); - } else - dbc->rdata.size = 0; - - while (recno > ++nrecs) - if ((ret = __ram_add(dbc, - &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0) - return (ret); - return (0); -} - -/* - * __ram_source -- - * Load information about the backing file. - */ -static int -__ram_source(dbp, rp, fname) - DB *dbp; - RECNO *rp; - const char *fname; -{ - size_t size; - u_int32_t bytes, mbytes, oflags; - int ret; - - /* - * !!! - * The caller has full responsibility for cleaning up on error -- - * (it has to anyway, in case it fails after this routine succeeds). - */ - if ((ret = __db_appname(dbp->dbenv, - DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0) - return (ret); - - oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0; - if ((ret = - __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) { - __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); - return (ret); - } - - /* - * XXX - * We'd like to test to see if the file is too big to mmap. Since we - * don't know what size or type off_t's or size_t's are, or the largest - * unsigned integral type is, or what random insanity the local C - * compiler will perpetrate, doing the comparison in a portable way is - * flatly impossible. Hope that mmap fails if the file is too large. - */ - if ((ret = __os_ioinfo(rp->re_source, - rp->re_fd, &mbytes, &bytes, NULL)) != 0) { - __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); - return (ret); - } - if (mbytes == 0 && bytes == 0) { - F_SET(rp, RECNO_EOF); - return (0); - } - - size = mbytes * MEGABYTE + bytes; - if ((ret = __db_mapfile(rp->re_source, - rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0) - return (ret); - rp->re_cmap = rp->re_smap; - rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size); - rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ? __ram_fmap : __ram_vmap; - return (0); -} - -/* - * __ram_writeback -- - * Rewrite the backing file. - */ -static int -__ram_writeback(dbc) - DBC *dbc; -{ - DB *dbp; - DBT key, data; - RECNO *rp; - db_recno_t keyno; - ssize_t nw; - int fd, ret, t_ret; - u_int8_t delim, *pad; - - dbp = dbc->dbp; - rp = ((BTREE *)dbp->internal)->recno; - - /* If the file wasn't modified, we're done. */ - if (!F_ISSET(rp, RECNO_MODIFIED)) - return (0); - - /* If there's no backing source file, we're done. */ - if (rp->re_source == NULL) { - F_CLR(rp, RECNO_MODIFIED); - return (0); - } - - /* - * Read any remaining records into the tree. - * - * !!! - * This is why we can't support transactions when applications specify - * backing (re_source) files. At this point we have to read in the - * rest of the records from the file so that we can write all of the - * records back out again, which could modify a page for which we'd - * have to log changes and which we don't have locked. This could be - * partially fixed by taking a snapshot of the entire file during the - * db_open(), or, since db_open() isn't transaction protected, as part - * of the first DB operation. But, if a checkpoint occurs then, the - * part of the log holding the copy of the file could be discarded, and - * that would make it impossible to recover in the face of disaster. - * This could all probably be fixed, but it would require transaction - * protecting the backing source file, i.e. mpool would have to know - * about it, and we don't want to go there. - */ - if ((ret = - __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND) - return (ret); - - /* - * !!! - * Close any underlying mmap region. This is required for Windows NT - * (4.0, Service Pack 2) -- if the file is still mapped, the following - * open will fail. - */ - if (rp->re_smap != NULL) { - (void)__db_unmapfile(rp->re_smap, rp->re_msize); - rp->re_smap = NULL; - } - - /* Get rid of any backing file descriptor, just on GP's. */ - if (rp->re_fd != -1) { - (void)__os_close(rp->re_fd); - rp->re_fd = -1; - } - - /* Open the file, truncating it. */ - if ((ret = __db_open(rp->re_source, - DB_SEQUENTIAL | DB_TRUNCATE, - DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) { - __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); - return (ret); - } - - /* - * We step through the records, writing each one out. Use the record - * number and the dbp->get() function, instead of a cursor, so we find - * and write out "deleted" or non-existent records. - */ - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - key.size = sizeof(db_recno_t); - key.data = &keyno; - - /* - * We'll need the delimiter if we're doing variable-length records, - * and the pad character if we're doing fixed-length records. - */ - delim = rp->re_delim; - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if ((ret = __os_malloc(rp->re_len, NULL, &pad)) != 0) - goto err; - memset(pad, rp->re_pad, rp->re_len); - } else - COMPQUIET(pad, NULL); - for (keyno = 1;; ++keyno) { - switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) { - case 0: - if ((ret = - __os_write(fd, data.data, data.size, &nw)) != 0) - goto err; - if (nw != (ssize_t)data.size) { - ret = EIO; - goto err; - } - break; - case DB_KEYEMPTY: - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if ((ret = - __os_write(fd, pad, rp->re_len, &nw)) != 0) - goto err; - if (nw != (ssize_t)rp->re_len) { - ret = EIO; - goto err; - } - } - break; - case DB_NOTFOUND: - ret = 0; - goto done; - } - if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if ((ret = __os_write(fd, &delim, 1, &nw)) != 0) - goto err; - if (nw != 1) { - ret = EIO; - goto err; - } - } - } - -err: -done: /* Close the file descriptor. */ - if ((t_ret = __os_close(fd)) != 0 || ret == 0) - ret = t_ret; - - if (ret == 0) - F_CLR(rp, RECNO_MODIFIED); - return (ret); -} - -/* - * __ram_fmap -- - * Get fixed length records from a file. - */ -static int -__ram_fmap(dbc, top) - DBC *dbc; - db_recno_t top; -{ - DB *dbp; - DBT data; - RECNO *rp; - db_recno_t recno; - u_int32_t len; - u_int8_t *sp, *ep, *p; - int ret; - - if ((ret = __bam_nrecs(dbc, &recno)) != 0) - return (ret); - - dbp = dbc->dbp; - rp = ((BTREE *)(dbp->internal))->recno; - - if (dbc->rdata.ulen < rp->re_len) { - if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; - return (ret); - } - dbc->rdata.ulen = rp->re_len; - } - - memset(&data, 0, sizeof(data)); - data.data = dbc->rdata.data; - data.size = rp->re_len; - - sp = (u_int8_t *)rp->re_cmap; - ep = (u_int8_t *)rp->re_emap; - while (recno < top) { - if (sp >= ep) { - F_SET(rp, RECNO_EOF); - return (DB_NOTFOUND); - } - len = rp->re_len; - for (p = dbc->rdata.data; - sp < ep && len > 0; *p++ = *sp++, --len) - ; - - /* - * Another process may have read this record from the input - * file and stored it into the database already, in which - * case we don't need to repeat that operation. We detect - * this by checking if the last record we've read is greater - * or equal to the number of records in the database. - * - * XXX - * We should just do a seek, since the records are fixed - * length. - */ - if (rp->re_last >= recno) { - if (len != 0) - memset(p, rp->re_pad, len); - - ++recno; - if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0) - return (ret); - } - ++rp->re_last; - } - rp->re_cmap = sp; - return (0); -} - -/* - * __ram_vmap -- - * Get variable length records from a file. - */ -static int -__ram_vmap(dbc, top) - DBC *dbc; - db_recno_t top; -{ - DBT data; - RECNO *rp; - db_recno_t recno; - u_int8_t *sp, *ep; - int delim, ret; - - rp = ((BTREE *)(dbc->dbp->internal))->recno; - - if ((ret = __bam_nrecs(dbc, &recno)) != 0) - return (ret); - - memset(&data, 0, sizeof(data)); - - delim = rp->re_delim; - - sp = (u_int8_t *)rp->re_cmap; - ep = (u_int8_t *)rp->re_emap; - while (recno < top) { - if (sp >= ep) { - F_SET(rp, RECNO_EOF); - return (DB_NOTFOUND); - } - for (data.data = sp; sp < ep && *sp != delim; ++sp) - ; - - /* - * Another process may have read this record from the input - * file and stored it into the database already, in which - * case we don't need to repeat that operation. We detect - * this by checking if the last record we've read is greater - * or equal to the number of records in the database. - */ - if (rp->re_last >= recno) { - data.size = sp - (u_int8_t *)data.data; - ++recno; - if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0) - return (ret); - } - ++rp->re_last; - ++sp; - } - rp->re_cmap = sp; - return (0); -} - -/* - * __ram_add -- - * Add records into the tree. - */ -static int -__ram_add(dbc, recnop, data, flags, bi_flags) - DBC *dbc; - db_recno_t *recnop; - DBT *data; - u_int32_t flags, bi_flags; -{ - BKEYDATA *bk; - CURSOR *cp; - DB *dbp; - PAGE *h; - db_indx_t indx; - int exact, isdeleted, ret, stack; - - dbp = dbc->dbp; - cp = dbc->internal; - -retry: /* Find the slot for insertion. */ - if ((ret = __bam_rsearch(dbc, recnop, - S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0) - return (ret); - h = cp->csp->page; - indx = cp->csp->indx; - stack = 1; - - /* - * If re-numbering records, the on-page deleted flag means this record - * was implicitly created. If not re-numbering records, the on-page - * deleted flag means this record was implicitly created, or, it was - * deleted at some time. - * - * If DB_NOOVERWRITE is set and the item already exists in the tree, - * return an error unless the item was either marked for deletion or - * only implicitly created. - */ - isdeleted = 0; - if (exact) { - bk = GET_BKEYDATA(h, indx); - if (B_DISSET(bk->type)) - isdeleted = 1; - else - if (flags == DB_NOOVERWRITE) { - ret = DB_KEYEXIST; - goto err; - } - } - - /* - * Select the arguments for __bam_iitem() and do the insert. If the - * key is an exact match, or we're replacing the data item with a - * new data item, replace the current item. If the key isn't an exact - * match, we're inserting a new key/data pair, before the search - * location. - */ - switch (ret = __bam_iitem(dbc, - &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) { - case 0: - /* - * Don't adjust anything. - * - * If we inserted a record, no cursors need adjusting because - * the only new record it's possible to insert is at the very - * end of the tree. The necessary adjustments to the internal - * page counts were made by __bam_iitem(). - * - * If we overwrote a record, no cursors need adjusting because - * future DBcursor->get calls will simply return the underlying - * record (there's no adjustment made for the DB_CURRENT flag - * when a cursor get operation immediately follows a cursor - * delete operation, and the normal adjustment for the DB_NEXT - * flag is still correct). - */ - break; - case DB_NEEDSPLIT: - /* Discard the stack of pages and split the page. */ - (void)__bam_stkrel(dbc, 0); - stack = 0; - - if ((ret = __bam_split(dbc, recnop)) != 0) - goto err; - - goto retry; - /* NOTREACHED */ - default: - goto err; - } - - -err: if (stack) - __bam_stkrel(dbc, 0); - - return (ret); -} diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c deleted file mode 100644 index 8efe4059a8..0000000000 --- a/db2/btree/bt_rsearch.c +++ /dev/null @@ -1,391 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_rsearch -- - * Search a btree for a record number. - * - * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); - */ -int -__bam_rsearch(dbc, recnop, flags, stop, exactp) - DBC *dbc; - db_recno_t *recnop; - u_int32_t flags; - int stop, *exactp; -{ - BINTERNAL *bi; - CURSOR *cp; - DB *dbp; - DB_LOCK lock; - PAGE *h; - RINTERNAL *ri; - db_indx_t indx, top; - db_pgno_t pg; - db_recno_t i, recno, total; - int ret, stack; - - dbp = dbc->dbp; - cp = dbc->internal; - - BT_STK_CLR(cp); - - /* - * There are several ways we search a btree tree. The flags argument - * specifies if we're acquiring read or write locks and if we are - * locking pairs of pages. In addition, if we're adding or deleting - * an item, we have to lock the entire tree, regardless. See btree.h - * for more details. - * - * If write-locking pages, we need to know whether or not to acquire a - * write lock on a page before getting it. This depends on how deep it - * is in tree, which we don't know until we acquire the root page. So, - * if we need to lock the root page we may have to upgrade it later, - * because we won't get the correct lock initially. - * - * Retrieve the root page. - */ - pg = PGNO_ROOT; - stack = LF_ISSET(S_STACK); - if ((ret = __bam_lget(dbc, - 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { - (void)__BT_LPUT(dbc, lock); - return (ret); - } - - /* - * Decide if we need to save this page; if we do, write lock it. - * We deliberately don't lock-couple on this call. If the tree - * is tiny, i.e., one page, and two threads are busily updating - * the root page, we're almost guaranteed deadlocks galore, as - * each one gets a read lock and then blocks the other's attempt - * for a write lock. - */ - if (!stack && - ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) || - (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { - (void)__BT_LPUT(dbc, lock); - return (ret); - } - stack = 1; - } - - /* - * If appending to the tree, set the record number now -- we have the - * root page locked. - * - * Delete only deletes exact matches, read only returns exact matches. - * Note, this is different from __bam_search(), which returns non-exact - * matches for read. - * - * The record may not exist. We can only return the correct location - * for the record immediately after the last record in the tree, so do - * a fast check now. - */ - total = RE_NREC(h); - if (LF_ISSET(S_APPEND)) { - *exactp = 0; - *recnop = recno = total + 1; - } else { - recno = *recnop; - if (recno <= total) - *exactp = 1; - else { - *exactp = 0; - if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) { - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - return (DB_NOTFOUND); - } - } - } - - /* - * !!! - * Record numbers in the tree are 0-based, but the recno is - * 1-based. All of the calculations below have to take this - * into account. - */ - for (total = 0;;) { - switch (TYPE(h)) { - case P_LBTREE: - recno -= total; - - /* - * There may be logically deleted records on the page, - * walk the page correcting for them. The record may - * not exist if there are enough deleted records in the - * page. - */ - if (recno <= (db_recno_t)NUM_ENT(h) / P_INDX) - for (i = recno - 1;; --i) { - if (B_DISSET(GET_BKEYDATA(h, - i * P_INDX + O_INDX)->type)) - ++recno; - if (i == 0) - break; - } - if (recno > (db_recno_t)NUM_ENT(h) / P_INDX) { - *exactp = 0; - if (!LF_ISSET(S_PAST_EOF) || recno > - (db_recno_t)(NUM_ENT(h) / P_INDX + 1)) { - ret = DB_NOTFOUND; - goto err; - } - - } - - /* Correct from 1-based to 0-based for a page offset. */ - --recno; - BT_STK_ENTER(cp, h, recno * P_INDX, lock, ret); - return (ret); - case P_IBTREE: - for (indx = 0, top = NUM_ENT(h);;) { - bi = GET_BINTERNAL(h, indx); - if (++indx == top || total + bi->nrecs >= recno) - break; - total += bi->nrecs; - } - pg = bi->pgno; - break; - case P_LRECNO: - recno -= total; - - /* Correct from 1-based to 0-based for a page offset. */ - --recno; - BT_STK_ENTER(cp, h, recno, lock, ret); - return (ret); - case P_IRECNO: - for (indx = 0, top = NUM_ENT(h);;) { - ri = GET_RINTERNAL(h, indx); - if (++indx == top || total + ri->nrecs >= recno) - break; - total += ri->nrecs; - } - pg = ri->pgno; - break; - default: - return (__db_pgfmt(dbp, h->pgno)); - } - --indx; - - if (stack) { - /* Return if this is the lowest page wanted. */ - if (LF_ISSET(S_PARENT) && stop == h->level) { - BT_STK_ENTER(cp, h, indx, lock, ret); - return (ret); - } - BT_STK_PUSH(cp, h, indx, lock, ret); - if (ret != 0) - goto err; - - if ((ret = - __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0) - goto err; - } else { - /* - * Decide if we want to return a pointer to the next - * page in the stack. If we do, write lock it and - * never unlock it. - */ - if ((LF_ISSET(S_PARENT) && - (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) || - (h->level - 1) == LEAFLEVEL) - stack = 1; - - (void)memp_fput(dbp->mpf, h, 0); - - if ((ret = - __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ? - DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) - goto err; - } - - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) - goto err; - } - /* NOTREACHED */ - -err: BT_STK_POP(cp); - __bam_stkrel(dbc, 0); - return (ret); -} - -/* - * __bam_adjust -- - * Adjust the tree after adding or deleting a record. - * - * PUBLIC: int __bam_adjust __P((DBC *, int32_t)); - */ -int -__bam_adjust(dbc, adjust) - DBC *dbc; - int32_t adjust; -{ - CURSOR *cp; - DB *dbp; - EPG *epg; - PAGE *h; - int ret; - - dbp = dbc->dbp; - cp = dbc->internal; - - /* Update the record counts for the tree. */ - for (epg = cp->sp; epg <= cp->csp; ++epg) { - h = epg->page; - if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) { - if (DB_LOGGING(dbc) && - (ret = __bam_cadjust_log(dbp->dbenv->lg_info, - dbc->txn, &LSN(h), 0, dbp->log_fileid, - PGNO(h), &LSN(h), (u_int32_t)epg->indx, - adjust, 1)) != 0) - return (ret); - - if (TYPE(h) == P_IBTREE) - GET_BINTERNAL(h, epg->indx)->nrecs += adjust; - else - GET_RINTERNAL(h, epg->indx)->nrecs += adjust; - - if (PGNO(h) == PGNO_ROOT) - RE_NREC_ADJ(h, adjust); - - if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) - return (ret); - } - } - return (0); -} - -/* - * __bam_nrecs -- - * Return the number of records in the tree. - * - * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *)); - */ -int -__bam_nrecs(dbc, rep) - DBC *dbc; - db_recno_t *rep; -{ - DB *dbp; - DB_LOCK lock; - PAGE *h; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - - pgno = PGNO_ROOT; - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) - return (ret); - - *rep = RE_NREC(h); - - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbc, lock); - - return (0); -} - -/* - * __bam_total -- - * Return the number of records below a page. - * - * PUBLIC: db_recno_t __bam_total __P((PAGE *)); - */ -db_recno_t -__bam_total(h) - PAGE *h; -{ - db_recno_t nrecs; - db_indx_t indx, top; - - nrecs = 0; - top = NUM_ENT(h); - - switch (TYPE(h)) { - case P_LBTREE: - /* Check for logically deleted records. */ - for (indx = 0; indx < top; indx += P_INDX) - if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) - ++nrecs; - break; - case P_IBTREE: - for (indx = 0; indx < top; indx += O_INDX) - nrecs += GET_BINTERNAL(h, indx)->nrecs; - break; - case P_LRECNO: - nrecs = NUM_ENT(h); - break; - case P_IRECNO: - for (indx = 0; indx < top; indx += O_INDX) - nrecs += GET_RINTERNAL(h, indx)->nrecs; - break; - } - - return (nrecs); -} diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c deleted file mode 100644 index 1f439a4261..0000000000 --- a/db2/btree/bt_search.c +++ /dev/null @@ -1,369 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_search.c 10.25 (Sleepycat) 12/16/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_search -- - * Search a btree for a key. - * - * PUBLIC: int __bam_search __P((DBC *, - * PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *)); - */ -int -__bam_search(dbc, key, flags, stop, recnop, exactp) - DBC *dbc; - const DBT *key; - u_int32_t flags; - int stop, *exactp; - db_recno_t *recnop; -{ - BTREE *t; - CURSOR *cp; - DB *dbp; - DB_LOCK lock; - PAGE *h; - db_indx_t base, i, indx, lim; - db_pgno_t pg; - db_recno_t recno; - int cmp, jump, ret, stack; - - dbp = dbc->dbp; - cp = dbc->internal; - t = dbp->internal; - recno = 0; - - BT_STK_CLR(cp); - - /* - * There are several ways we search a btree tree. The flags argument - * specifies if we're acquiring read or write locks, if we position - * to the first or last item in a set of duplicates, if we return - * deleted items, and if we are locking pairs of pages. In addition, - * if we're modifying record numbers, we have to lock the entire tree - * regardless. See btree.h for more details. - * - * If write-locking pages, we need to know whether or not to acquire a - * write lock on a page before getting it. This depends on how deep it - * is in tree, which we don't know until we acquire the root page. So, - * if we need to lock the root page we may have to upgrade it later, - * because we won't get the correct lock initially. - * - * Retrieve the root page. - */ - pg = PGNO_ROOT; - stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK); - if ((ret = __bam_lget(dbc, - 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { - (void)__BT_LPUT(dbc, lock); - return (ret); - } - - /* - * Decide if we need to save this page; if we do, write lock it. - * We deliberately don't lock-couple on this call. If the tree - * is tiny, i.e., one page, and two threads are busily updating - * the root page, we're almost guaranteed deadlocks galore, as - * each one gets a read lock and then blocks the other's attempt - * for a write lock. - */ - if (!stack && - ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) || - (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0) - return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { - (void)__BT_LPUT(dbc, lock); - return (ret); - } - stack = 1; - } - - for (;;) { - /* - * Do a binary search on the current page. If we're searching - * a leaf page, we have to manipulate the indices in groups of - * two. If we're searching an internal page, they're an index - * per page item. If we find an exact match on a leaf page, - * we're done. - */ - jump = TYPE(h) == P_LBTREE ? P_INDX : O_INDX; - for (base = 0, - lim = NUM_ENT(h) / (db_indx_t)jump; lim != 0; lim >>= 1) { - indx = base + ((lim >> 1) * jump); - if ((cmp = - __bam_cmp(dbp, key, h, indx, t->bt_compare)) == 0) { - if (TYPE(h) == P_LBTREE) - goto match; - goto next; - } - if (cmp > 0) { - base = indx + jump; - --lim; - } - } - - /* - * No match found. Base is the smallest index greater than - * key and may be zero or a last + O_INDX index. - * - * If it's a leaf page, return base as the "found" value. - * Delete only deletes exact matches. - */ - if (TYPE(h) == P_LBTREE) { - *exactp = 0; - - if (LF_ISSET(S_EXACT)) - goto notfound; - - /* - * !!! - * Possibly returning a deleted record -- DB_SET_RANGE, - * DB_KEYFIRST and DB_KEYLAST don't require an exact - * match, and we don't want to walk multiple pages here - * to find an undeleted record. This is handled in the - * __bam_c_search() routine. - */ - BT_STK_ENTER(cp, h, base, lock, ret); - return (ret); - } - - /* - * If it's not a leaf page, record the internal page (which is - * a parent page for the key). Decrement the base by 1 if it's - * non-zero so that if a split later occurs, the inserted page - * will be to the right of the saved page. - */ - indx = base > 0 ? base - O_INDX : base; - - /* - * If we're trying to calculate the record number, sum up - * all the record numbers on this page up to the indx point. - */ - if (recnop != NULL) - for (i = 0; i < indx; ++i) - recno += GET_BINTERNAL(h, i)->nrecs; - -next: pg = GET_BINTERNAL(h, indx)->pgno; - if (stack) { - /* Return if this is the lowest page wanted. */ - if (LF_ISSET(S_PARENT) && stop == h->level) { - BT_STK_ENTER(cp, h, indx, lock, ret); - return (ret); - } - BT_STK_PUSH(cp, h, indx, lock, ret); - if (ret != 0) - goto err; - - if ((ret = - __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0) - goto err; - } else { - /* - * Decide if we want to return a reference to the next - * page in the return stack. If so, lock it and never - * unlock it. - */ - if ((LF_ISSET(S_PARENT) && - (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) || - (h->level - 1) == LEAFLEVEL) - stack = 1; - - (void)memp_fput(dbp->mpf, h, 0); - - if ((ret = - __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ? - DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) - goto err; - } - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) - goto err; - } - /* NOTREACHED */ - -match: *exactp = 1; - - /* - * If we're trying to calculate the record number, add in the - * offset on this page and correct for the fact that records - * in the tree are 0-based. - */ - if (recnop != NULL) - *recnop = recno + (indx / P_INDX) + 1; - - /* - * If we got here, we know that we have a btree leaf page. - * - * If there are duplicates, go to the first/last one. This is - * safe because we know that we're not going to leave the page, - * all duplicate sets that are not on overflow pages exist on a - * single leaf page. - */ - if (LF_ISSET(S_DUPLAST)) - while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && - h->inp[indx] == h->inp[indx + P_INDX]) - indx += P_INDX; - else - while (indx > 0 && - h->inp[indx] == h->inp[indx - P_INDX]) - indx -= P_INDX; - - /* - * Now check if we are allowed to return deleted items; if not - * find the next (or previous) non-deleted item. - */ - if (LF_ISSET(S_DELNO)) { - if (LF_ISSET(S_DUPLAST)) - while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) && - indx > 0 && - h->inp[indx] == h->inp[indx - P_INDX]) - indx -= P_INDX; - else - while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) && - indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && - h->inp[indx] == h->inp[indx + P_INDX]) - indx += P_INDX; - - if (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) - goto notfound; - } - - BT_STK_ENTER(cp, h, indx, lock, ret); - return (ret); - -notfound: - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - ret = DB_NOTFOUND; - -err: if (cp->csp > cp->sp) { - BT_STK_POP(cp); - __bam_stkrel(dbc, 0); - } - return (ret); -} - -/* - * __bam_stkrel -- - * Release all pages currently held in the stack. - * - * PUBLIC: int __bam_stkrel __P((DBC *, int)); - */ -int -__bam_stkrel(dbc, nolocks) - DBC *dbc; - int nolocks; -{ - CURSOR *cp; - DB *dbp; - EPG *epg; - - dbp = dbc->dbp; - cp = dbc->internal; - - /* Release inner pages first. */ - for (epg = cp->sp; epg <= cp->csp; ++epg) { - if (epg->page != NULL) - (void)memp_fput(dbp->mpf, epg->page, 0); - if (epg->lock != LOCK_INVALID) { - if (nolocks) - (void)__BT_LPUT(dbc, epg->lock); - else - (void)__BT_TLPUT(dbc, epg->lock); - } - } - - /* Clear the stack, all pages have been released. */ - BT_STK_CLR(cp); - - return (0); -} - -/* - * __bam_stkgrow -- - * Grow the stack. - * - * PUBLIC: int __bam_stkgrow __P((CURSOR *)); - */ -int -__bam_stkgrow(cp) - CURSOR *cp; -{ - EPG *p; - size_t entries; - int ret; - - entries = cp->esp - cp->sp; - - if ((ret = __os_calloc(entries * 2, sizeof(EPG), &p)) != 0) - return (ret); - memcpy(p, cp->sp, entries * sizeof(EPG)); - if (cp->sp != cp->stack) - __os_free(cp->sp, entries * sizeof(EPG)); - cp->sp = p; - cp->csp = p + entries; - cp->esp = p + entries * 2; - return (0); -} diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c deleted file mode 100644 index 1d8e926d85..0000000000 --- a/db2/btree/bt_split.c +++ /dev/null @@ -1,966 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_split.c 10.33 (Sleepycat) 10/13/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -static int __bam_broot __P((DBC *, PAGE *, PAGE *, PAGE *)); -static int __bam_page __P((DBC *, EPG *, EPG *)); -static int __bam_pinsert __P((DBC *, EPG *, PAGE *, PAGE *)); -static int __bam_psplit __P((DBC *, EPG *, PAGE *, PAGE *, db_indx_t *)); -static int __bam_root __P((DBC *, EPG *)); -static int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *)); - -/* - * __bam_split -- - * Split a page. - * - * PUBLIC: int __bam_split __P((DBC *, void *)); - */ -int -__bam_split(dbc, arg) - DBC *dbc; - void *arg; -{ - CURSOR *cp; - DB *dbp; - enum { UP, DOWN } dir; - int exact, level, ret; - - dbp = dbc->dbp; - cp = dbc->internal; - - /* - * The locking protocol we use to avoid deadlock to acquire locks by - * walking down the tree, but we do it as lazily as possible, locking - * the root only as a last resort. We expect all stack pages to have - * been discarded before we're called; we discard all short-term locks. - * - * When __bam_split is first called, we know that a leaf page was too - * full for an insert. We don't know what leaf page it was, but we - * have the key/recno that caused the problem. We call XX_search to - * reacquire the leaf page, but this time get both the leaf page and - * its parent, locked. We then split the leaf page and see if the new - * internal key will fit into the parent page. If it will, we're done. - * - * If it won't, we discard our current locks and repeat the process, - * only this time acquiring the parent page and its parent, locked. - * This process repeats until we succeed in the split, splitting the - * root page as the final resort. The entire process then repeats, - * as necessary, until we split a leaf page. - * - * XXX - * A traditional method of speeding this up is to maintain a stack of - * the pages traversed in the original search. You can detect if the - * stack is correct by storing the page's LSN when it was searched and - * comparing that LSN with the current one when it's locked during the - * split. This would be an easy change for this code, but I have no - * numbers that indicate it's worthwhile. - */ - for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) { - /* - * Acquire a page and its parent, locked. - */ - if ((ret = (dbp->type == DB_BTREE ? - __bam_search(dbc, arg, S_WRPAIR, level, NULL, &exact) : - __bam_rsearch(dbc, - (db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0) - return (ret); - - /* Split the page. */ - ret = cp->csp[0].page->pgno == PGNO_ROOT ? - __bam_root(dbc, &cp->csp[0]) : - __bam_page(dbc, &cp->csp[-1], &cp->csp[0]); - BT_STK_CLR(cp); - - switch (ret) { - case 0: - /* Once we've split the leaf page, we're done. */ - if (level == LEAFLEVEL) - return (0); - - /* Switch directions. */ - if (dir == UP) - dir = DOWN; - break; - case DB_NEEDSPLIT: - /* - * It's possible to fail to split repeatedly, as other - * threads may be modifying the tree, or the page usage - * is sufficiently bad that we don't get enough space - * the first time. - */ - if (dir == DOWN) - dir = UP; - break; - default: - return (ret); - } - } - /* NOTREACHED */ -} - -/* - * __bam_root -- - * Split the root page of a btree. - */ -static int -__bam_root(dbc, cp) - DBC *dbc; - EPG *cp; -{ - DB *dbp; - PAGE *lp, *rp; - db_indx_t split; - int ret; - - dbp = dbc->dbp; - - /* Yeah, right. */ - if (cp->page->level >= MAXBTREELEVEL) { - ret = ENOSPC; - goto err; - } - - /* Create new left and right pages for the split. */ - lp = rp = NULL; - if ((ret = __bam_new(dbc, TYPE(cp->page), &lp)) != 0 || - (ret = __bam_new(dbc, TYPE(cp->page), &rp)) != 0) - goto err; - P_INIT(lp, dbp->pgsize, lp->pgno, - PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno, - cp->page->level, TYPE(cp->page)); - P_INIT(rp, dbp->pgsize, rp->pgno, - ISINTERNAL(cp->page) ? PGNO_INVALID : lp->pgno, PGNO_INVALID, - cp->page->level, TYPE(cp->page)); - - /* Split the page. */ - if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0) - goto err; - - /* Log the change. */ - if (DB_LOGGING(dbc)) { - DBT __a; - DB_LSN __lsn; - memset(&__a, 0, sizeof(__a)); - __a.data = cp->page; - __a.size = dbp->pgsize; - ZERO_LSN(__lsn); - if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbc->txn, - &LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp), - PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &__lsn, - &__a)) != 0) - goto err; - LSN(lp) = LSN(rp) = LSN(cp->page); - } - - /* Clean up the new root page. */ - if ((ret = (dbp->type == DB_RECNO ? - __ram_root(dbc, cp->page, lp, rp) : - __bam_broot(dbc, cp->page, lp, rp))) != 0) - goto err; - - /* Adjust any cursors. Do it last so we don't have to undo it. */ - __bam_ca_split(dbp, cp->page->pgno, lp->pgno, rp->pgno, split, 1); - - /* Success -- write the real pages back to the store. */ - (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY); - (void)__BT_TLPUT(dbc, cp->lock); - (void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY); - (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY); - - return (0); - -err: if (lp != NULL) - (void)__bam_free(dbc, lp); - if (rp != NULL) - (void)__bam_free(dbc, rp); - (void)memp_fput(dbp->mpf, cp->page, 0); - (void)__BT_TLPUT(dbc, cp->lock); - return (ret); -} - -/* - * __bam_page -- - * Split the non-root page of a btree. - */ -static int -__bam_page(dbc, pp, cp) - DBC *dbc; - EPG *pp, *cp; -{ - DB *dbp; - DB_LOCK tplock; - PAGE *lp, *rp, *tp; - db_indx_t split; - int ret; - - dbp = dbc->dbp; - lp = rp = tp = NULL; - ret = -1; - - /* Create new right page for the split. */ - if ((ret = __bam_new(dbc, TYPE(cp->page), &rp)) != 0) - goto err; - P_INIT(rp, dbp->pgsize, rp->pgno, - ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno, - ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno, - cp->page->level, TYPE(cp->page)); - - /* Create new left page for the split. */ - if ((ret = __os_malloc(dbp->pgsize, NULL, &lp)) != 0) - goto err; - P_INIT(lp, dbp->pgsize, cp->page->pgno, - ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->prev_pgno, - ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno, - cp->page->level, TYPE(cp->page)); - ZERO_LSN(lp->lsn); - - /* - * Split right. - * - * Only the indices are sorted on the page, i.e., the key/data pairs - * aren't, so it's simpler to copy the data from the split page onto - * two new pages instead of copying half the data to the right page - * and compacting the left page in place. Since the left page can't - * change, we swap the original and the allocated left page after the - * split. - */ - if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0) - goto err; - - /* - * Fix up the previous pointer of any leaf page following the split - * page. - * - * !!! - * There are interesting deadlock situations here as we write-lock a - * page that's not in our direct ancestry. Consider a cursor walking - * through the leaf pages, that has the previous page read-locked and - * is waiting on a lock for the page we just split. It will deadlock - * here. If this is a problem, we can fail in the split; it's not a - * problem as the split will succeed after the cursor passes through - * the page we're splitting. - */ - if (TYPE(cp->page) == P_LBTREE && rp->next_pgno != PGNO_INVALID) { - if ((ret = __bam_lget(dbc, - 0, rp->next_pgno, DB_LOCK_WRITE, &tplock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &rp->next_pgno, 0, &tp)) != 0) - goto err; - } - - /* Insert the new pages into the parent page. */ - if ((ret = __bam_pinsert(dbc, pp, lp, rp)) != 0) - goto err; - - /* Log the change. */ - if (DB_LOGGING(dbc)) { - DBT __a; - DB_LSN __lsn; - memset(&__a, 0, sizeof(__a)); - __a.data = cp->page; - __a.size = dbp->pgsize; - if (tp == NULL) - ZERO_LSN(__lsn); - if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbc->txn, - &cp->page->lsn, 0, dbp->log_fileid, PGNO(cp->page), - &LSN(cp->page), PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), - tp == NULL ? 0 : PGNO(tp), - tp == NULL ? &__lsn : &LSN(tp), &__a)) != 0) - goto err; - - LSN(lp) = LSN(rp) = LSN(cp->page); - if (tp != NULL) - LSN(tp) = LSN(cp->page); - } - - /* Copy the allocated page into place. */ - memcpy(cp->page, lp, LOFFSET(lp)); - memcpy((u_int8_t *)cp->page + HOFFSET(lp), - (u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp)); - __os_free(lp, dbp->pgsize); - lp = NULL; - - /* Finish the next-page link. */ - if (tp != NULL) - tp->prev_pgno = rp->pgno; - - /* Adjust any cursors. Do so last so we don't have to undo it. */ - __bam_ca_split(dbp, cp->page->pgno, cp->page->pgno, rp->pgno, split, 0); - - /* Success -- write the real pages back to the store. */ - (void)memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY); - (void)__BT_TLPUT(dbc, pp->lock); - (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY); - (void)__BT_TLPUT(dbc, cp->lock); - (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY); - if (tp != NULL) { - (void)memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY); - (void)__BT_TLPUT(dbc, tplock); - } - return (0); - -err: if (lp != NULL) - __os_free(lp, dbp->pgsize); - if (rp != NULL) - (void)__bam_free(dbc, rp); - if (tp != NULL) { - (void)memp_fput(dbp->mpf, tp, 0); - if (ret == DB_NEEDSPLIT) - (void)__BT_LPUT(dbc, tplock); - else - (void)__BT_TLPUT(dbc, tplock); - } - (void)memp_fput(dbp->mpf, pp->page, 0); - if (ret == DB_NEEDSPLIT) - (void)__BT_LPUT(dbc, pp->lock); - else - (void)__BT_TLPUT(dbc, pp->lock); - (void)memp_fput(dbp->mpf, cp->page, 0); - if (ret == DB_NEEDSPLIT) - (void)__BT_LPUT(dbc, cp->lock); - else - (void)__BT_TLPUT(dbc, cp->lock); - return (ret); -} - -/* - * __bam_broot -- - * Fix up the btree root page after it has been split. - */ -static int -__bam_broot(dbc, rootp, lp, rp) - DBC *dbc; - PAGE *rootp, *lp, *rp; -{ - BINTERNAL bi, *child_bi; - BKEYDATA *child_bk; - DB *dbp; - DBT hdr, data; - int ret; - - dbp = dbc->dbp; - - /* - * If the root page was a leaf page, change it into an internal page. - * We copy the key we split on (but not the key's data, in the case of - * a leaf page) to the new root page. - */ - P_INIT(rootp, dbp->pgsize, - PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE); - - memset(&data, 0, sizeof(data)); - memset(&hdr, 0, sizeof(hdr)); - - /* - * The btree comparison code guarantees that the left-most key on any - * level of the tree is never used, so it doesn't need to be filled in. - */ - memset(&bi, 0, sizeof(bi)); - bi.len = 0; - B_TSET(bi.type, B_KEYDATA, 0); - bi.pgno = lp->pgno; - if (F_ISSET(dbp, DB_BT_RECNUM)) { - bi.nrecs = __bam_total(lp); - RE_NREC_SET(rootp, bi.nrecs); - } - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - if ((ret = - __db_pitem(dbc, rootp, 0, BINTERNAL_SIZE(0), &hdr, NULL)) != 0) - return (ret); - - switch (TYPE(rp)) { - case P_IBTREE: - /* Copy the first key of the child page onto the root page. */ - child_bi = GET_BINTERNAL(rp, 0); - - bi.len = child_bi->len; - B_TSET(bi.type, child_bi->type, 0); - bi.pgno = rp->pgno; - if (F_ISSET(dbp, DB_BT_RECNUM)) { - bi.nrecs = __bam_total(rp); - RE_NREC_ADJ(rootp, bi.nrecs); - } - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - data.data = child_bi->data; - data.size = child_bi->len; - if ((ret = __db_pitem(dbc, rootp, 1, - BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0) - return (ret); - - /* Increment the overflow ref count. */ - if (B_TYPE(child_bi->type) == B_OVERFLOW) - if ((ret = __db_ovref(dbc, - ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0) - return (ret); - break; - case P_LBTREE: - /* Copy the first key of the child page onto the root page. */ - child_bk = GET_BKEYDATA(rp, 0); - switch (B_TYPE(child_bk->type)) { - case B_KEYDATA: - bi.len = child_bk->len; - B_TSET(bi.type, child_bk->type, 0); - bi.pgno = rp->pgno; - if (F_ISSET(dbp, DB_BT_RECNUM)) { - bi.nrecs = __bam_total(rp); - RE_NREC_ADJ(rootp, bi.nrecs); - } - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - data.data = child_bk->data; - data.size = child_bk->len; - if ((ret = __db_pitem(dbc, rootp, 1, - BINTERNAL_SIZE(child_bk->len), &hdr, &data)) != 0) - return (ret); - break; - case B_DUPLICATE: - case B_OVERFLOW: - bi.len = BOVERFLOW_SIZE; - B_TSET(bi.type, child_bk->type, 0); - bi.pgno = rp->pgno; - if (F_ISSET(dbp, DB_BT_RECNUM)) { - bi.nrecs = __bam_total(rp); - RE_NREC_ADJ(rootp, bi.nrecs); - } - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - data.data = child_bk; - data.size = BOVERFLOW_SIZE; - if ((ret = __db_pitem(dbc, rootp, 1, - BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0) - return (ret); - - /* Increment the overflow ref count. */ - if (B_TYPE(child_bk->type) == B_OVERFLOW) - if ((ret = __db_ovref(dbc, - ((BOVERFLOW *)child_bk)->pgno, 1)) != 0) - return (ret); - break; - default: - return (__db_pgfmt(dbp, rp->pgno)); - } - break; - default: - return (__db_pgfmt(dbp, rp->pgno)); - } - return (0); -} - -/* - * __ram_root -- - * Fix up the recno root page after it has been split. - */ -static int -__ram_root(dbc, rootp, lp, rp) - DBC *dbc; - PAGE *rootp, *lp, *rp; -{ - DB *dbp; - DBT hdr; - RINTERNAL ri; - int ret; - - dbp = dbc->dbp; - - /* Initialize the page. */ - P_INIT(rootp, dbp->pgsize, - PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO); - - /* Initialize the header. */ - memset(&hdr, 0, sizeof(hdr)); - hdr.data = &ri; - hdr.size = RINTERNAL_SIZE; - - /* Insert the left and right keys, set the header information. */ - ri.pgno = lp->pgno; - ri.nrecs = __bam_total(lp); - if ((ret = __db_pitem(dbc, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0) - return (ret); - RE_NREC_SET(rootp, ri.nrecs); - ri.pgno = rp->pgno; - ri.nrecs = __bam_total(rp); - if ((ret = __db_pitem(dbc, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0) - return (ret); - RE_NREC_ADJ(rootp, ri.nrecs); - return (0); -} - -/* - * __bam_pinsert -- - * Insert a new key into a parent page, completing the split. - */ -static int -__bam_pinsert(dbc, parent, lchild, rchild) - DBC *dbc; - EPG *parent; - PAGE *lchild, *rchild; -{ - BINTERNAL bi, *child_bi; - BKEYDATA *child_bk, *tmp_bk; - BTREE *t; - DB *dbp; - DBT a, b, hdr, data; - PAGE *ppage; - RINTERNAL ri; - db_indx_t off; - db_recno_t nrecs; - u_int32_t n, nbytes, nksize; - int ret; - - dbp = dbc->dbp; - t = dbp->internal; - ppage = parent->page; - - /* If handling record numbers, count records split to the right page. */ - nrecs = dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM) ? - __bam_total(rchild) : 0; - - /* - * Now we insert the new page's first key into the parent page, which - * completes the split. The parent points to a PAGE and a page index - * offset, where the new key goes ONE AFTER the index, because we split - * to the right. - * - * XXX - * Some btree algorithms replace the key for the old page as well as - * the new page. We don't, as there's no reason to believe that the - * first key on the old page is any better than the key we have, and, - * in the case of a key being placed at index 0 causing the split, the - * key is unavailable. - */ - off = parent->indx + O_INDX; - - /* - * Calculate the space needed on the parent page. - * - * Prefix trees: space hack used when inserting into BINTERNAL pages. - * Retain only what's needed to distinguish between the new entry and - * the LAST entry on the page to its left. If the keys compare equal, - * retain the entire key. We ignore overflow keys, and the entire key - * must be retained for the next-to-leftmost key on the leftmost page - * of each level, or the search will fail. Applicable ONLY to internal - * pages that have leaf pages as children. Further reduction of the - * key between pairs of internal pages loses too much information. - */ - switch (TYPE(rchild)) { - case P_IBTREE: - child_bi = GET_BINTERNAL(rchild, 0); - nbytes = BINTERNAL_PSIZE(child_bi->len); - - if (P_FREESPACE(ppage) < nbytes) - return (DB_NEEDSPLIT); - - /* Add a new record for the right page. */ - memset(&bi, 0, sizeof(bi)); - bi.len = child_bi->len; - B_TSET(bi.type, child_bi->type, 0); - bi.pgno = rchild->pgno; - bi.nrecs = nrecs; - memset(&hdr, 0, sizeof(hdr)); - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - memset(&data, 0, sizeof(data)); - data.data = child_bi->data; - data.size = child_bi->len; - if ((ret = __db_pitem(dbc, ppage, off, - BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0) - return (ret); - - /* Increment the overflow ref count. */ - if (B_TYPE(child_bi->type) == B_OVERFLOW) - if ((ret = __db_ovref(dbc, - ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0) - return (ret); - break; - case P_LBTREE: - child_bk = GET_BKEYDATA(rchild, 0); - switch (B_TYPE(child_bk->type)) { - case B_KEYDATA: - nbytes = BINTERNAL_PSIZE(child_bk->len); - nksize = child_bk->len; - if (t->bt_prefix == NULL) - goto noprefix; - if (ppage->prev_pgno == PGNO_INVALID && off <= 1) - goto noprefix; - tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) - P_INDX); - if (B_TYPE(tmp_bk->type) != B_KEYDATA) - goto noprefix; - memset(&a, 0, sizeof(a)); - a.size = tmp_bk->len; - a.data = tmp_bk->data; - memset(&b, 0, sizeof(b)); - b.size = child_bk->len; - b.data = child_bk->data; - nksize = t->bt_prefix(&a, &b); - if ((n = BINTERNAL_PSIZE(nksize)) < nbytes) - nbytes = n; - else -noprefix: nksize = child_bk->len; - - if (P_FREESPACE(ppage) < nbytes) - return (DB_NEEDSPLIT); - - memset(&bi, 0, sizeof(bi)); - bi.len = nksize; - B_TSET(bi.type, child_bk->type, 0); - bi.pgno = rchild->pgno; - bi.nrecs = nrecs; - memset(&hdr, 0, sizeof(hdr)); - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - memset(&data, 0, sizeof(data)); - data.data = child_bk->data; - data.size = nksize; - if ((ret = __db_pitem(dbc, ppage, off, - BINTERNAL_SIZE(nksize), &hdr, &data)) != 0) - return (ret); - break; - case B_DUPLICATE: - case B_OVERFLOW: - nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE); - - if (P_FREESPACE(ppage) < nbytes) - return (DB_NEEDSPLIT); - - memset(&bi, 0, sizeof(bi)); - bi.len = BOVERFLOW_SIZE; - B_TSET(bi.type, child_bk->type, 0); - bi.pgno = rchild->pgno; - bi.nrecs = nrecs; - memset(&hdr, 0, sizeof(hdr)); - hdr.data = &bi; - hdr.size = SSZA(BINTERNAL, data); - memset(&data, 0, sizeof(data)); - data.data = child_bk; - data.size = BOVERFLOW_SIZE; - if ((ret = __db_pitem(dbc, ppage, off, - BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0) - return (ret); - - /* Increment the overflow ref count. */ - if (B_TYPE(child_bk->type) == B_OVERFLOW) - if ((ret = __db_ovref(dbc, - ((BOVERFLOW *)child_bk)->pgno, 1)) != 0) - return (ret); - break; - default: - return (__db_pgfmt(dbp, rchild->pgno)); - } - break; - case P_IRECNO: - case P_LRECNO: - nbytes = RINTERNAL_PSIZE; - - if (P_FREESPACE(ppage) < nbytes) - return (DB_NEEDSPLIT); - - /* Add a new record for the right page. */ - memset(&hdr, 0, sizeof(hdr)); - hdr.data = &ri; - hdr.size = RINTERNAL_SIZE; - ri.pgno = rchild->pgno; - ri.nrecs = nrecs; - if ((ret = __db_pitem(dbc, - ppage, off, RINTERNAL_SIZE, &hdr, NULL)) != 0) - return (ret); - break; - default: - return (__db_pgfmt(dbp, rchild->pgno)); - } - - /* Adjust the parent page's left page record count. */ - if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) { - /* Log the change. */ - if (DB_LOGGING(dbc) && - (ret = __bam_cadjust_log(dbp->dbenv->lg_info, - dbc->txn, &LSN(ppage), 0, dbp->log_fileid, - PGNO(ppage), &LSN(ppage), (u_int32_t)parent->indx, - -(int32_t)nrecs, (int32_t)0)) != 0) - return (ret); - - /* Update the left page count. */ - if (dbp->type == DB_RECNO) - GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs; - else - GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs; - } - - return (0); -} - -/* - * __bam_psplit -- - * Do the real work of splitting the page. - */ -static int -__bam_psplit(dbc, cp, lp, rp, splitret) - DBC *dbc; - EPG *cp; - PAGE *lp, *rp; - db_indx_t *splitret; -{ - DB *dbp; - PAGE *pp; - db_indx_t half, nbytes, off, splitp, top; - int adjust, cnt, isbigkey, ret; - - dbp = dbc->dbp; - pp = cp->page; - adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX; - - /* - * If we're splitting the first (last) page on a level because we're - * inserting (appending) a key to it, it's likely that the data is - * sorted. Moving a single item to the new page is less work and can - * push the fill factor higher than normal. If we're wrong it's not - * a big deal, we'll just do the split the right way next time. - */ - off = 0; - if (NEXT_PGNO(pp) == PGNO_INVALID && - ((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) || - (!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page)))) - off = NUM_ENT(cp->page) - adjust; - else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0) - off = adjust; - - if (off != 0) - goto sort; - - /* - * Split the data to the left and right pages. Try not to split on - * an overflow key. (Overflow keys on internal pages will slow down - * searches.) Refuse to split in the middle of a set of duplicates. - * - * First, find the optimum place to split. - * - * It's possible to try and split past the last record on the page if - * there's a very large record at the end of the page. Make sure this - * doesn't happen by bounding the check at the next-to-last entry on - * the page. - * - * Note, we try and split half the data present on the page. This is - * because another process may have already split the page and left - * it half empty. We don't try and skip the split -- we don't know - * how much space we're going to need on the page, and we may need up - * to half the page for a big item, so there's no easy test to decide - * if we need to split or not. Besides, if two threads are inserting - * data into the same place in the database, we're probably going to - * need more space soon anyway. - */ - top = NUM_ENT(pp) - adjust; - half = (dbp->pgsize - HOFFSET(pp)) / 2; - for (nbytes = 0, off = 0; off < top && nbytes < half; ++off) - switch (TYPE(pp)) { - case P_IBTREE: - if (B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA) - nbytes += - BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len); - else - nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE); - break; - case P_LBTREE: - if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) - nbytes += - BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len); - else - nbytes += BOVERFLOW_SIZE; - - ++off; - if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) - nbytes += - BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len); - else - nbytes += BOVERFLOW_SIZE; - break; - case P_IRECNO: - nbytes += RINTERNAL_SIZE; - break; - case P_LRECNO: - nbytes += BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len); - break; - default: - return (__db_pgfmt(dbp, pp->pgno)); - } -sort: splitp = off; - - /* - * Splitp is either at or just past the optimum split point. If - * it's a big key, try and find something close by that's not. - */ - if (TYPE(pp) == P_IBTREE) - isbigkey = B_TYPE(GET_BINTERNAL(pp, off)->type) != B_KEYDATA; - else if (TYPE(pp) == P_LBTREE) - isbigkey = B_TYPE(GET_BKEYDATA(pp, off)->type) != B_KEYDATA; - else - isbigkey = 0; - if (isbigkey) - for (cnt = 1; cnt <= 3; ++cnt) { - off = splitp + cnt * adjust; - if (off < (db_indx_t)NUM_ENT(pp) && - ((TYPE(pp) == P_IBTREE && - B_TYPE(GET_BINTERNAL(pp,off)->type) == B_KEYDATA) || - B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)) { - splitp = off; - break; - } - if (splitp <= (db_indx_t)(cnt * adjust)) - continue; - off = splitp - cnt * adjust; - if (TYPE(pp) == P_IBTREE ? - B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA : - B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) { - splitp = off; - break; - } - } - - /* - * We can't split in the middle a set of duplicates. We know that - * no duplicate set can take up more than about 25% of the page, - * because that's the point where we push it off onto a duplicate - * page set. So, this loop can't be unbounded. - */ - if (F_ISSET(dbp, DB_AM_DUP) && TYPE(pp) == P_LBTREE && - pp->inp[splitp] == pp->inp[splitp - adjust]) - for (cnt = 1;; ++cnt) { - off = splitp + cnt * adjust; - if (off < NUM_ENT(pp) && - pp->inp[splitp] != pp->inp[off]) { - splitp = off; - break; - } - if (splitp <= (db_indx_t)(cnt * adjust)) - continue; - off = splitp - cnt * adjust; - if (pp->inp[splitp] != pp->inp[off]) { - splitp = off + adjust; - break; - } - } - - - /* We're going to split at splitp. */ - if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0) - return (ret); - if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0) - return (ret); - - *splitret = splitp; - return (0); -} - -/* - * __bam_copy -- - * Copy a set of records from one page to another. - * - * PUBLIC: int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); - */ -int -__bam_copy(dbp, pp, cp, nxt, stop) - DB *dbp; - PAGE *pp, *cp; - u_int32_t nxt, stop; -{ - db_indx_t nbytes, off; - - /* - * Copy the rest of the data to the right page. Nxt is the next - * offset placed on the target page. - */ - for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) { - switch (TYPE(pp)) { - case P_IBTREE: - if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA) - nbytes = - BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len); - else - nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE); - break; - case P_LBTREE: - /* - * If we're on a key and it's a duplicate, just copy - * the offset. - */ - if (off != 0 && (nxt % P_INDX) == 0 && - pp->inp[nxt] == pp->inp[nxt - P_INDX]) { - cp->inp[off] = cp->inp[off - P_INDX]; - continue; - } - /* FALLTHROUGH */ - case P_LRECNO: - if (B_TYPE(GET_BKEYDATA(pp, nxt)->type) == B_KEYDATA) - nbytes = - BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len); - else - nbytes = BOVERFLOW_SIZE; - break; - case P_IRECNO: - nbytes = RINTERNAL_SIZE; - break; - default: - return (__db_pgfmt(dbp, pp->pgno)); - } - cp->inp[off] = HOFFSET(cp) -= nbytes; - memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes); - } - return (0); -} diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c deleted file mode 100644 index 855ef40bbd..0000000000 --- a/db2/btree/bt_stat.c +++ /dev/null @@ -1,198 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)bt_stat.c 10.27 (Sleepycat) 11/25/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include - -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "btree.h" - -/* - * __bam_stat -- - * Gather/print the btree statistics - * - * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); - */ -int -__bam_stat(dbp, spp, db_malloc, flags) - DB *dbp; - void *spp; - void *(*db_malloc) __P((size_t)); - u_int32_t flags; -{ - BTMETA *meta; - BTREE *t; - DBC *dbc; - DB_BTREE_STAT *sp; - DB_LOCK lock; - PAGE *h; - db_pgno_t lastpgno, pgno; - int ret, t_ret; - - DB_PANIC_CHECK(dbp); - - /* Check for invalid flags. */ - if ((ret = __db_statchk(dbp, flags)) != 0) - return (ret); - - if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, NULL, "bam_stat", NULL, NULL, flags); - - t = dbp->internal; - - if (spp == NULL) - return (0); - - /* Allocate and clear the structure. */ - if ((ret = __os_malloc(sizeof(*sp), db_malloc, &sp)) != 0) - goto err; - memset(sp, 0, sizeof(*sp)); - - /* If the app just wants the record count, make it fast. */ - if (flags == DB_RECORDCOUNT) { - pgno = PGNO_ROOT; - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&h)) != 0) - goto err; - - sp->bt_nrecs = RE_NREC(h); - - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - goto done; - } - - /* Get the meta-data page. */ - pgno = PGNO_METADATA; - if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) - goto err; - - /* Translate the metadata flags. */ - if (F_ISSET(meta, BTM_DUP)) - sp->bt_flags |= DB_DUP; - if (F_ISSET(meta, BTM_FIXEDLEN)) - sp->bt_flags |= DB_FIXEDLEN; - if (F_ISSET(meta, BTM_RECNUM)) - sp->bt_flags |= DB_RECNUM; - if (F_ISSET(meta, BTM_RENUMBER)) - sp->bt_flags |= DB_RENUMBER; - - /* Get the remaining metadata fields. */ - sp->bt_minkey = meta->minkey; - sp->bt_maxkey = meta->maxkey; - sp->bt_re_len = meta->re_len; - sp->bt_re_pad = meta->re_pad; - sp->bt_magic = meta->magic; - sp->bt_version = meta->version; - - /* Get the page size from the DB. */ - sp->bt_pagesize = dbp->pgsize; - - /* Walk the free list, counting pages. */ - for (sp->bt_free = 0, pgno = meta->free; pgno != PGNO_INVALID;) { - ++sp->bt_free; - - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { - (void)memp_fput(dbp->mpf, meta, 0); - (void)__BT_TLPUT(dbc, lock); - goto err; - } - pgno = h->next_pgno; - (void)memp_fput(dbp->mpf, h, 0); - } - - /* Discard the meta-data page. */ - (void)memp_fput(dbp->mpf, meta, 0); - (void)__BT_TLPUT(dbc, lock); - - /* Determine the last page of the database. */ - if ((ret = memp_fget(dbp->mpf, &lastpgno, DB_MPOOL_LAST, &h)) != 0) - goto err; - (void)memp_fput(dbp->mpf, h, 0); - - /* Get the root page. */ - pgno = PGNO_ROOT; - if ((ret = __bam_lget(dbc, 0, PGNO_ROOT, DB_LOCK_READ, &lock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { - (void)__BT_LPUT(dbc, lock); - goto err; - } - - /* Get the levels from the root page. */ - sp->bt_levels = h->level; - - /* Walk the page list, counting things. */ - for (;;) { - switch (TYPE(h)) { - case P_INVALID: - break; - case P_IBTREE: - case P_IRECNO: - ++sp->bt_int_pg; - sp->bt_int_pgfree += HOFFSET(h) - LOFFSET(h); - break; - case P_LBTREE: - ++sp->bt_leaf_pg; - sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h); - sp->bt_nrecs += NUM_ENT(h) / P_INDX; - break; - case P_LRECNO: - ++sp->bt_leaf_pg; - sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h); - sp->bt_nrecs += NUM_ENT(h); - break; - case P_DUPLICATE: - ++sp->bt_dup_pg; - /* XXX MARGO: sp->bt_dup_pgfree; */ - break; - case P_OVERFLOW: - ++sp->bt_over_pg; - /* XXX MARGO: sp->bt_over_pgfree; */ - break; - default: - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - return (__db_pgfmt(dbp, pgno)); - } - - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbc, lock); - - if (++pgno > lastpgno) - break; - if (__bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) - break; - if (memp_fget(dbp->mpf, &pgno, 0, &h) != 0) { - (void)__BT_LPUT(dbc, lock); - break; - } - } - -done: *(DB_BTREE_STAT **)spp = sp; - ret = 0; - -err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c deleted file mode 100644 index 95ea76e2cd..0000000000 --- a/db2/btree/btree_auto.c +++ /dev/null @@ -1,1508 +0,0 @@ -/* Do not edit: automatically built by dist/db_gen.sh. */ -#include "config.h" - -#ifndef NO_SYSTEM_INCLUDES -#include -#include -#include -#include -#include -#endif - -#include "db_int.h" -#include "db_page.h" -#include "db_dispatch.h" -#include "btree.h" -#include "db_am.h" -/* - * PUBLIC: int __bam_pg_alloc_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t, - * PUBLIC: u_int32_t, db_pgno_t)); - */ -int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags, - fileid, meta_lsn, page_lsn, pgno, ptype, next) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - DB_LSN * meta_lsn; - DB_LSN * page_lsn; - db_pgno_t pgno; - u_int32_t ptype; - db_pgno_t next; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_pg_alloc; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(*meta_lsn) - + sizeof(*page_lsn) - + sizeof(pgno) - + sizeof(ptype) - + sizeof(next); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - if (meta_lsn != NULL) - memcpy(bp, meta_lsn, sizeof(*meta_lsn)); - else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - if (page_lsn != NULL) - memcpy(bp, page_lsn, sizeof(*page_lsn)); - else - memset(bp, 0, sizeof(*page_lsn)); - bp += sizeof(*page_lsn); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - memcpy(bp, &ptype, sizeof(ptype)); - bp += sizeof(ptype); - memcpy(bp, &next, sizeof(next)); - bp += sizeof(next); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_pg_alloc_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_pg_alloc_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_pg_alloc_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_pg_alloc_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_pg_alloc: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - printf("\tpage_lsn: [%lu][%lu]\n", - (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tptype: %lu\n", (u_long)argp->ptype); - printf("\tnext: %lu\n", (u_long)argp->next); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_pg_alloc_read __P((void *, __bam_pg_alloc_args **)); - */ -int -__bam_pg_alloc_read(recbuf, argpp) - void *recbuf; - __bam_pg_alloc_args **argpp; -{ - __bam_pg_alloc_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_pg_alloc_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn)); - bp += sizeof(argp->meta_lsn); - memcpy(&argp->page_lsn, bp, sizeof(argp->page_lsn)); - bp += sizeof(argp->page_lsn); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->ptype, bp, sizeof(argp->ptype)); - bp += sizeof(argp->ptype); - memcpy(&argp->next, bp, sizeof(argp->next)); - bp += sizeof(argp->next); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_pg_free_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, const DBT *, - * PUBLIC: db_pgno_t)); - */ -int __bam_pg_free_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, meta_lsn, header, next) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - DB_LSN * meta_lsn; - const DBT *header; - db_pgno_t next; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_pg_free; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(*meta_lsn) - + sizeof(u_int32_t) + (header == NULL ? 0 : header->size) - + sizeof(next); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (meta_lsn != NULL) - memcpy(bp, meta_lsn, sizeof(*meta_lsn)); - else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - if (header == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &header->size, sizeof(header->size)); - bp += sizeof(header->size); - memcpy(bp, header->data, header->size); - bp += header->size; - } - memcpy(bp, &next, sizeof(next)); - bp += sizeof(next); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_pg_free_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_pg_free_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_pg_free_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_pg_free_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_pg_free: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - printf("\theader: "); - for (i = 0; i < argp->header.size; i++) { - ch = ((u_int8_t *)argp->header.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\tnext: %lu\n", (u_long)argp->next); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_pg_free_read __P((void *, __bam_pg_free_args **)); - */ -int -__bam_pg_free_read(recbuf, argpp) - void *recbuf; - __bam_pg_free_args **argpp; -{ - __bam_pg_free_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_pg_free_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn)); - bp += sizeof(argp->meta_lsn); - memcpy(&argp->header.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->header.data = bp; - bp += argp->header.size; - memcpy(&argp->next, bp, sizeof(argp->next)); - bp += sizeof(argp->next); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_split_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, - * PUBLIC: DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *, - * PUBLIC: const DBT *)); - */ -int __bam_split_log(logp, txnid, ret_lsnp, flags, - fileid, left, llsn, right, rlsn, indx, - npgno, nlsn, pg) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t left; - DB_LSN * llsn; - db_pgno_t right; - DB_LSN * rlsn; - u_int32_t indx; - db_pgno_t npgno; - DB_LSN * nlsn; - const DBT *pg; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_split; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(left) - + sizeof(*llsn) - + sizeof(right) - + sizeof(*rlsn) - + sizeof(indx) - + sizeof(npgno) - + sizeof(*nlsn) - + sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &left, sizeof(left)); - bp += sizeof(left); - if (llsn != NULL) - memcpy(bp, llsn, sizeof(*llsn)); - else - memset(bp, 0, sizeof(*llsn)); - bp += sizeof(*llsn); - memcpy(bp, &right, sizeof(right)); - bp += sizeof(right); - if (rlsn != NULL) - memcpy(bp, rlsn, sizeof(*rlsn)); - else - memset(bp, 0, sizeof(*rlsn)); - bp += sizeof(*rlsn); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &npgno, sizeof(npgno)); - bp += sizeof(npgno); - if (nlsn != NULL) - memcpy(bp, nlsn, sizeof(*nlsn)); - else - memset(bp, 0, sizeof(*nlsn)); - bp += sizeof(*nlsn); - if (pg == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &pg->size, sizeof(pg->size)); - bp += sizeof(pg->size); - memcpy(bp, pg->data, pg->size); - bp += pg->size; - } -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_split_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_split_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_split_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_split_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_split: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tleft: %lu\n", (u_long)argp->left); - printf("\tllsn: [%lu][%lu]\n", - (u_long)argp->llsn.file, (u_long)argp->llsn.offset); - printf("\tright: %lu\n", (u_long)argp->right); - printf("\trlsn: [%lu][%lu]\n", - (u_long)argp->rlsn.file, (u_long)argp->rlsn.offset); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\tnpgno: %lu\n", (u_long)argp->npgno); - printf("\tnlsn: [%lu][%lu]\n", - (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset); - printf("\tpg: "); - for (i = 0; i < argp->pg.size; i++) { - ch = ((u_int8_t *)argp->pg.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_split_read __P((void *, __bam_split_args **)); - */ -int -__bam_split_read(recbuf, argpp) - void *recbuf; - __bam_split_args **argpp; -{ - __bam_split_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_split_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->left, bp, sizeof(argp->left)); - bp += sizeof(argp->left); - memcpy(&argp->llsn, bp, sizeof(argp->llsn)); - bp += sizeof(argp->llsn); - memcpy(&argp->right, bp, sizeof(argp->right)); - bp += sizeof(argp->right); - memcpy(&argp->rlsn, bp, sizeof(argp->rlsn)); - bp += sizeof(argp->rlsn); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->npgno, bp, sizeof(argp->npgno)); - bp += sizeof(argp->npgno); - memcpy(&argp->nlsn, bp, sizeof(argp->nlsn)); - bp += sizeof(argp->nlsn); - memcpy(&argp->pg.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->pg.data = bp; - bp += argp->pg.size; - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_rsplit_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, const DBT *, db_pgno_t, - * PUBLIC: const DBT *, DB_LSN *)); - */ -int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, pgdbt, nrec, rootent, rootlsn) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - const DBT *pgdbt; - db_pgno_t nrec; - const DBT *rootent; - DB_LSN * rootlsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_rsplit; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size) - + sizeof(nrec) - + sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size) - + sizeof(*rootlsn); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (pgdbt == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &pgdbt->size, sizeof(pgdbt->size)); - bp += sizeof(pgdbt->size); - memcpy(bp, pgdbt->data, pgdbt->size); - bp += pgdbt->size; - } - memcpy(bp, &nrec, sizeof(nrec)); - bp += sizeof(nrec); - if (rootent == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &rootent->size, sizeof(rootent->size)); - bp += sizeof(rootent->size); - memcpy(bp, rootent->data, rootent->size); - bp += rootent->size; - } - if (rootlsn != NULL) - memcpy(bp, rootlsn, sizeof(*rootlsn)); - else - memset(bp, 0, sizeof(*rootlsn)); - bp += sizeof(*rootlsn); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_rsplit_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_rsplit_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_rsplit_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_rsplit_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_rsplit: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tpgdbt: "); - for (i = 0; i < argp->pgdbt.size; i++) { - ch = ((u_int8_t *)argp->pgdbt.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\tnrec: %lu\n", (u_long)argp->nrec); - printf("\trootent: "); - for (i = 0; i < argp->rootent.size; i++) { - ch = ((u_int8_t *)argp->rootent.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\trootlsn: [%lu][%lu]\n", - (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_rsplit_read __P((void *, __bam_rsplit_args **)); - */ -int -__bam_rsplit_read(recbuf, argpp) - void *recbuf; - __bam_rsplit_args **argpp; -{ - __bam_rsplit_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_rsplit_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->pgdbt.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->pgdbt.data = bp; - bp += argp->pgdbt.size; - memcpy(&argp->nrec, bp, sizeof(argp->nrec)); - bp += sizeof(argp->nrec); - memcpy(&argp->rootent.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->rootent.data = bp; - bp += argp->rootent.size; - memcpy(&argp->rootlsn, bp, sizeof(argp->rootlsn)); - bp += sizeof(argp->rootlsn); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_adj_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, u_int32_t)); - */ -int __bam_adj_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, lsn, indx, indx_copy, is_insert) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - DB_LSN * lsn; - u_int32_t indx; - u_int32_t indx_copy; - u_int32_t is_insert; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_adj; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(*lsn) - + sizeof(indx) - + sizeof(indx_copy) - + sizeof(is_insert); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (lsn != NULL) - memcpy(bp, lsn, sizeof(*lsn)); - else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &indx_copy, sizeof(indx_copy)); - bp += sizeof(indx_copy); - memcpy(bp, &is_insert, sizeof(is_insert)); - bp += sizeof(is_insert); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_adj_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_adj_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_adj_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_adj_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_adj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy); - printf("\tis_insert: %lu\n", (u_long)argp->is_insert); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_adj_read __P((void *, __bam_adj_args **)); - */ -int -__bam_adj_read(recbuf, argpp) - void *recbuf; - __bam_adj_args **argpp; -{ - __bam_adj_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_adj_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->lsn, bp, sizeof(argp->lsn)); - bp += sizeof(argp->lsn); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->indx_copy, bp, sizeof(argp->indx_copy)); - bp += sizeof(argp->indx_copy); - memcpy(&argp->is_insert, bp, sizeof(argp->is_insert)); - bp += sizeof(argp->is_insert); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_cadjust_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, - * PUBLIC: int32_t, int32_t)); - */ -int __bam_cadjust_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, lsn, indx, adjust, total) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - DB_LSN * lsn; - u_int32_t indx; - int32_t adjust; - int32_t total; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_cadjust; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(*lsn) - + sizeof(indx) - + sizeof(adjust) - + sizeof(total); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (lsn != NULL) - memcpy(bp, lsn, sizeof(*lsn)); - else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &adjust, sizeof(adjust)); - bp += sizeof(adjust); - memcpy(bp, &total, sizeof(total)); - bp += sizeof(total); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_cadjust_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_cadjust_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_cadjust_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_cadjust_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_cadjust: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\tadjust: %ld\n", (long)argp->adjust); - printf("\ttotal: %ld\n", (long)argp->total); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_cadjust_read __P((void *, __bam_cadjust_args **)); - */ -int -__bam_cadjust_read(recbuf, argpp) - void *recbuf; - __bam_cadjust_args **argpp; -{ - __bam_cadjust_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_cadjust_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->lsn, bp, sizeof(argp->lsn)); - bp += sizeof(argp->lsn); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->adjust, bp, sizeof(argp->adjust)); - bp += sizeof(argp->adjust); - memcpy(&argp->total, bp, sizeof(argp->total)); - bp += sizeof(argp->total); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_cdel_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t)); - */ -int __bam_cdel_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, lsn, indx) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - DB_LSN * lsn; - u_int32_t indx; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_cdel; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(*lsn) - + sizeof(indx); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (lsn != NULL) - memcpy(bp, lsn, sizeof(*lsn)); - else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_cdel_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_cdel_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_cdel_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_cdel_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_cdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_cdel_read __P((void *, __bam_cdel_args **)); - */ -int -__bam_cdel_read(recbuf, argpp) - void *recbuf; - __bam_cdel_args **argpp; -{ - __bam_cdel_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_cdel_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->lsn, bp, sizeof(argp->lsn)); - bp += sizeof(argp->lsn); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_repl_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, const DBT *, const DBT *, u_int32_t, - * PUBLIC: u_int32_t)); - */ -int __bam_repl_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, lsn, indx, isdeleted, orig, - repl, prefix, suffix) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - DB_LSN * lsn; - u_int32_t indx; - u_int32_t isdeleted; - const DBT *orig; - const DBT *repl; - u_int32_t prefix; - u_int32_t suffix; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_bam_repl; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(*lsn) - + sizeof(indx) - + sizeof(isdeleted) - + sizeof(u_int32_t) + (orig == NULL ? 0 : orig->size) - + sizeof(u_int32_t) + (repl == NULL ? 0 : repl->size) - + sizeof(prefix) - + sizeof(suffix); - if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (lsn != NULL) - memcpy(bp, lsn, sizeof(*lsn)); - else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &isdeleted, sizeof(isdeleted)); - bp += sizeof(isdeleted); - if (orig == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &orig->size, sizeof(orig->size)); - bp += sizeof(orig->size); - memcpy(bp, orig->data, orig->size); - bp += orig->size; - } - if (repl == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &repl->size, sizeof(repl->size)); - bp += sizeof(repl->size); - memcpy(bp, repl->data, repl->size); - bp += repl->size; - } - memcpy(bp, &prefix, sizeof(prefix)); - bp += sizeof(prefix); - memcpy(bp, &suffix, sizeof(suffix)); - bp += sizeof(suffix); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, 0); - return (ret); -} - -/* - * PUBLIC: int __bam_repl_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__bam_repl_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __bam_repl_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]bam_repl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted); - printf("\torig: "); - for (i = 0; i < argp->orig.size; i++) { - ch = ((u_int8_t *)argp->orig.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\trepl: "); - for (i = 0; i < argp->repl.size; i++) { - ch = ((u_int8_t *)argp->repl.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\tprefix: %lu\n", (u_long)argp->prefix); - printf("\tsuffix: %lu\n", (u_long)argp->suffix); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -/* - * PUBLIC: int __bam_repl_read __P((void *, __bam_repl_args **)); - */ -int -__bam_repl_read(recbuf, argpp) - void *recbuf; - __bam_repl_args **argpp; -{ - __bam_repl_args *argp; - u_int8_t *bp; - int ret; - - ret = __os_malloc(sizeof(__bam_repl_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->lsn, bp, sizeof(argp->lsn)); - bp += sizeof(argp->lsn); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->isdeleted, bp, sizeof(argp->isdeleted)); - bp += sizeof(argp->isdeleted); - memcpy(&argp->orig.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->orig.data = bp; - bp += argp->orig.size; - memcpy(&argp->repl.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->repl.data = bp; - bp += argp->repl.size; - memcpy(&argp->prefix, bp, sizeof(argp->prefix)); - bp += sizeof(argp->prefix); - memcpy(&argp->suffix, bp, sizeof(argp->suffix)); - bp += sizeof(argp->suffix); - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __bam_init_print __P((DB_ENV *)); - */ -int -__bam_init_print(dbenv) - DB_ENV *dbenv; -{ - int ret; - - if ((ret = __db_add_recovery(dbenv, - __bam_pg_alloc_print, DB_bam_pg_alloc)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_pg_free_print, DB_bam_pg_free)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_split_print, DB_bam_split)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_rsplit_print, DB_bam_rsplit)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_adj_print, DB_bam_adj)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_cadjust_print, DB_bam_cadjust)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_cdel_print, DB_bam_cdel)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_repl_print, DB_bam_repl)) != 0) - return (ret); - return (0); -} - -/* - * PUBLIC: int __bam_init_recover __P((DB_ENV *)); - */ -int -__bam_init_recover(dbenv) - DB_ENV *dbenv; -{ - int ret; - - if ((ret = __db_add_recovery(dbenv, - __bam_pg_alloc_recover, DB_bam_pg_alloc)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_pg_free_recover, DB_bam_pg_free)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_split_recover, DB_bam_split)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_rsplit_recover, DB_bam_rsplit)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_adj_recover, DB_bam_adj)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_cadjust_recover, DB_bam_cadjust)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_cdel_recover, DB_bam_cdel)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __bam_repl_recover, DB_bam_repl)) != 0) - return (ret); - return (0); -} - -- cgit v1.2.3