aboutsummaryrefslogtreecommitdiff
path: root/db2/btree
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-01-02 06:01:06 +0000
committerUlrich Drepper <drepper@redhat.com>2000-01-02 06:01:06 +0000
commit8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3 (patch)
tree099a250d7366aef2ab028fdb24f0d692cd784b4a /db2/btree
parent9a6450d578556c11e7c173d2f28362345b8f1258 (diff)
downloadglibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar
glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar.gz
glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar.bz2
glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.zip
Update.
* Makeconfig (all-subdirs): Remove db and db2. * db/*: Removed. * db2/*: Removed.
Diffstat (limited to 'db2/btree')
-rw-r--r--db2/btree/bt_compare.c195
-rw-r--r--db2/btree/bt_conv.c94
-rw-r--r--db2/btree/bt_curadj.c272
-rw-r--r--db2/btree/bt_cursor.c1913
-rw-r--r--db2/btree/bt_delete.c589
-rw-r--r--db2/btree/bt_open.c310
-rw-r--r--db2/btree/bt_page.c317
-rw-r--r--db2/btree/bt_put.c831
-rw-r--r--db2/btree/bt_rec.c903
-rw-r--r--db2/btree/bt_recno.c1356
-rw-r--r--db2/btree/bt_rsearch.c391
-rw-r--r--db2/btree/bt_search.c369
-rw-r--r--db2/btree/bt_split.c966
-rw-r--r--db2/btree/bt_stat.c198
-rw-r--r--db2/btree/btree_auto.c1508
15 files changed, 0 insertions, 10212 deletions
diff --git a/db2/btree/bt_compare.c b/db2/btree/bt_compare.c
deleted file mode 100644
index c60f920612..0000000000
--- a/db2/btree/bt_compare.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_compare.c 10.14 (Sleepycat) 10/9/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_cmp --
- * Compare a key to a given record.
- *
- * PUBLIC: int __bam_cmp __P((DB *, const DBT *,
- * PUBLIC: PAGE *, u_int32_t, int (*)(const DBT *, const DBT *)));
- */
-int
-__bam_cmp(dbp, dbt, h, indx, func)
- DB *dbp;
- const DBT *dbt;
- PAGE *h;
- u_int32_t indx;
- int (*func)__P((const DBT *, const DBT *));
-{
- BINTERNAL *bi;
- BKEYDATA *bk;
- BOVERFLOW *bo;
- DBT pg_dbt;
- int ret;
-
- /*
- * Returns:
- * < 0 if dbt is < page record
- * = 0 if dbt is = page record
- * > 0 if dbt is > page record
- *
- * !!!
- * We do not clear the pg_dbt DBT even though it's likely to contain
- * random bits. That should be okay, because the app's comparison
- * routine had better not be looking at fields other than data/size.
- * We don't clear it because we go through this path a lot and it's
- * expensive.
- */
- if (TYPE(h) == P_LBTREE || TYPE(h) == P_DUPLICATE) {
- bk = GET_BKEYDATA(h, indx);
- if (B_TYPE(bk->type) == B_OVERFLOW)
- bo = (BOVERFLOW *)bk;
- else {
- pg_dbt.data = bk->data;
- pg_dbt.size = bk->len;
- return (func(dbt, &pg_dbt));
- }
- } else {
- /*
- * The following code guarantees that the left-most key on an
- * internal page at any level of the btree is less than any
- * user specified key. This saves us from having to update the
- * leftmost key on an internal page when the user inserts a new
- * key in the tree smaller than anything we've seen before.
- */
- if (indx == 0 && h->prev_pgno == PGNO_INVALID)
- return (1);
-
- bi = GET_BINTERNAL(h, indx);
- if (B_TYPE(bi->type) == B_OVERFLOW)
- bo = (BOVERFLOW *)(bi->data);
- else {
- pg_dbt.data = bi->data;
- pg_dbt.size = bi->len;
- return (func(dbt, &pg_dbt));
- }
- }
-
- /*
- * Overflow.
- *
- * XXX
- * We ignore __db_moff() errors, because we have no way of returning
- * them.
- */
- (void) __db_moff(dbp,
- dbt, bo->pgno, bo->tlen, func == __bam_defcmp ? NULL : func, &ret);
- return (ret);
-}
-
-/*
- * __bam_defcmp --
- * Default comparison routine.
- *
- * PUBLIC: int __bam_defcmp __P((const DBT *, const DBT *));
- */
-int
-__bam_defcmp(a, b)
- const DBT *a, *b;
-{
- size_t len;
- u_int8_t *p1, *p2;
-
- /*
- * Returns:
- * < 0 if a is < b
- * = 0 if a is = b
- * > 0 if a is > b
- *
- * XXX
- * If a size_t doesn't fit into a long, or if the difference between
- * any two characters doesn't fit into an int, this routine can lose.
- * What we need is a signed integral type that's guaranteed to be at
- * least as large as a size_t, and there is no such thing.
- */
- len = a->size > b->size ? b->size : a->size;
- for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
- if (*p1 != *p2)
- return ((long)*p1 - (long)*p2);
- return ((long)a->size - (long)b->size);
-}
-
-/*
- * __bam_defpfx --
- * Default prefix routine.
- *
- * PUBLIC: size_t __bam_defpfx __P((const DBT *, const DBT *));
- */
-size_t
-__bam_defpfx(a, b)
- const DBT *a, *b;
-{
- size_t cnt, len;
- u_int8_t *p1, *p2;
-
- cnt = 1;
- len = a->size > b->size ? b->size : a->size;
- for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
- if (*p1 != *p2)
- return (cnt);
-
- /*
- * We know that a->size must be <= b->size, or they wouldn't be
- * in this order.
- */
- return (a->size < b->size ? a->size + 1 : a->size);
-}
diff --git a/db2/btree/bt_conv.c b/db2/btree/bt_conv.c
deleted file mode 100644
index a3069082ae..0000000000
--- a/db2/btree/bt_conv.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_conv.c 10.7 (Sleepycat) 9/20/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
-
-/*
- * __bam_pgin --
- * Convert host-specific page layout from the host-independent format
- * stored on disk.
- *
- * PUBLIC: int __bam_pgin __P((db_pgno_t, void *, DBT *));
- */
-int
-__bam_pgin(pg, pp, cookie)
- db_pgno_t pg;
- void *pp;
- DBT *cookie;
-{
- DB_PGINFO *pginfo;
-
- pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
- return (0);
- return (pg == PGNO_METADATA ?
- __bam_mswap(pp) : __db_pgin(pg, pginfo->db_pagesize, pp));
-}
-
-/*
- * __bam_pgout --
- * Convert host-specific page layout to the host-independent format
- * stored on disk.
- *
- * PUBLIC: int __bam_pgout __P((db_pgno_t, void *, DBT *));
- */
-int
-__bam_pgout(pg, pp, cookie)
- db_pgno_t pg;
- void *pp;
- DBT *cookie;
-{
- DB_PGINFO *pginfo;
-
- pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
- return (0);
- return (pg == PGNO_METADATA ?
- __bam_mswap(pp) : __db_pgout(pg, pginfo->db_pagesize, pp));
-}
-
-/*
- * __bam_mswap --
- * Swap the bytes on the btree metadata page.
- *
- * PUBLIC: int __bam_mswap __P((PAGE *));
- */
-int
-__bam_mswap(pg)
- PAGE *pg;
-{
- u_int8_t *p;
-
- p = (u_int8_t *)pg;
-
- /* Swap the meta-data information. */
- SWAP32(p); /* lsn.file */
- SWAP32(p); /* lsn.offset */
- SWAP32(p); /* pgno */
- SWAP32(p); /* magic */
- SWAP32(p); /* version */
- SWAP32(p); /* pagesize */
- SWAP32(p); /* maxkey */
- SWAP32(p); /* minkey */
- SWAP32(p); /* free */
- SWAP32(p); /* flags */
-
- return (0);
-}
diff --git a/db2/btree/bt_curadj.c b/db2/btree/bt_curadj.c
deleted file mode 100644
index 9b86fbb6d7..0000000000
--- a/db2/btree/bt_curadj.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_curadj.c 10.69 (Sleepycat) 12/2/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <stdlib.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-#ifdef DEBUG
-/*
- * __bam_cprint --
- * Display the current cursor list.
- *
- * PUBLIC: int __bam_cprint __P((DB *));
- */
-int
-__bam_cprint(dbp)
- DB *dbp;
-{
- CURSOR *cp;
- DBC *dbc;
-
- DB_THREAD_LOCK(dbp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (CURSOR *)dbc->internal;
- fprintf(stderr,
- "%#0x->%#0x: page: %lu index: %lu dpage %lu dindex: %lu recno: %lu",
- (u_int)dbc, (u_int)cp, (u_long)cp->pgno, (u_long)cp->indx,
- (u_long)cp->dpgno, (u_long)cp->dindx, (u_long)cp->recno);
- if (F_ISSET(cp, C_DELETED))
- fprintf(stderr, " (deleted)");
- fprintf(stderr, "\n");
- }
- DB_THREAD_UNLOCK(dbp);
-
- return (0);
-}
-#endif /* DEBUG */
-
-/*
- * __bam_ca_delete --
- * Update the cursors when items are deleted and when already deleted
- * items are overwritten. Return the number of relevant cursors found.
- *
- * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int));
- */
-int
-__bam_ca_delete(dbp, pgno, indx, delete)
- DB *dbp;
- db_pgno_t pgno;
- u_int32_t indx;
- int delete;
-{
- DBC *dbc;
- CURSOR *cp;
- int count; /* !!!: Has to contain max number of cursors. */
-
- /* Recno is responsible for its own adjustments. */
- if (dbp->type == DB_RECNO)
- return (0);
-
- /*
- * Adjust the cursors. We don't have to review the cursors for any
- * thread of control other than the current one, because we have the
- * page write locked at this point, and any other thread of control
- * had better be using a different locker ID, meaning only cursors in
- * our thread of control can be on the page.
- *
- * It's possible for multiple cursors within the thread to have write
- * locks on the same page, but, cursors within a thread must be single
- * threaded, so all we're locking here is the cursor linked list.
- */
- DB_THREAD_LOCK(dbp);
- for (count = 0, dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (CURSOR *)dbc->internal;
-
- if ((cp->pgno == pgno && cp->indx == indx) ||
- (cp->dpgno == pgno && cp->dindx == indx)) {
- if (delete)
- F_SET(cp, C_DELETED);
- else
- F_CLR(cp, C_DELETED);
- ++count;
- }
- }
- DB_THREAD_UNLOCK(dbp);
-
- return (count);
-}
-
-/*
- * __bam_ca_di --
- * Adjust the cursors during a delete or insert.
- *
- * PUBLIC: void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
- */
-void
-__bam_ca_di(dbp, pgno, indx, adjust)
- DB *dbp;
- db_pgno_t pgno;
- u_int32_t indx;
- int adjust;
-{
- CURSOR *cp;
- DBC *dbc;
-
- /* Recno is responsible for its own adjustments. */
- if (dbp->type == DB_RECNO)
- return;
-
- /*
- * Adjust the cursors. See the comment in __bam_ca_delete().
- */
- DB_THREAD_LOCK(dbp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (CURSOR *)dbc->internal;
- if (cp->pgno == pgno && cp->indx >= indx)
- cp->indx += adjust;
- if (cp->dpgno == pgno && cp->dindx >= indx)
- cp->dindx += adjust;
- }
- DB_THREAD_UNLOCK(dbp);
-}
-
-/*
- * __bam_ca_dup --
- * Adjust the cursors when moving items from a leaf page to a duplicates
- * page.
- *
- * PUBLIC: void __bam_ca_dup __P((DB *,
- * PUBLIC: db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
- */
-void
-__bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti)
- DB *dbp;
- db_pgno_t fpgno, tpgno;
- u_int32_t first, fi, ti;
-{
- CURSOR *cp;
- DBC *dbc;
-
- /* Recno is responsible for its own adjustments. */
- if (dbp->type == DB_RECNO)
- return;
-
- /*
- * Adjust the cursors. See the comment in __bam_ca_delete().
- */
- DB_THREAD_LOCK(dbp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (CURSOR *)dbc->internal;
- /*
- * Ignore matching entries that have already been moved,
- * we move from the same location on the leaf page more
- * than once.
- */
- if (cp->dpgno == PGNO_INVALID &&
- cp->pgno == fpgno && cp->indx == fi) {
- cp->indx = first;
- cp->dpgno = tpgno;
- cp->dindx = ti;
- }
- }
- DB_THREAD_UNLOCK(dbp);
-}
-
-/*
- * __bam_ca_rsplit --
- * Adjust the cursors when doing reverse splits.
- *
- * PUBLIC: void __bam_ca_rsplit __P((DB *, db_pgno_t, db_pgno_t));
- */
-void
-__bam_ca_rsplit(dbp, fpgno, tpgno)
- DB *dbp;
- db_pgno_t fpgno, tpgno;
-{
- CURSOR *cp;
- DBC *dbc;
-
- /* Recno is responsible for its own adjustments. */
- if (dbp->type == DB_RECNO)
- return;
-
- /*
- * Adjust the cursors. See the comment in __bam_ca_delete().
- */
- DB_THREAD_LOCK(dbp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (CURSOR *)dbc->internal;
- if (cp->pgno == fpgno)
- cp->pgno = tpgno;
- }
- DB_THREAD_UNLOCK(dbp);
-}
-
-/*
- * __bam_ca_split --
- * Adjust the cursors when splitting a page.
- *
- * PUBLIC: void __bam_ca_split __P((DB *,
- * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
- */
-void
-__bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft)
- DB *dbp;
- db_pgno_t ppgno, lpgno, rpgno;
- u_int32_t split_indx;
- int cleft;
-{
- DBC *dbc;
- CURSOR *cp;
-
- /* Recno is responsible for its own adjustments. */
- if (dbp->type == DB_RECNO)
- return;
-
- /*
- * Adjust the cursors. See the comment in __bam_ca_delete().
- *
- * If splitting the page that a cursor was on, the cursor has to be
- * adjusted to point to the same record as before the split. Most
- * of the time we don't adjust pointers to the left page, because
- * we're going to copy its contents back over the original page. If
- * the cursor is on the right page, it is decremented by the number of
- * records split to the left page.
- */
- DB_THREAD_LOCK(dbp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (CURSOR *)dbc->internal;
- if (cp->pgno == ppgno) {
- if (cp->indx < split_indx) {
- if (cleft)
- cp->pgno = lpgno;
- } else {
- cp->pgno = rpgno;
- cp->indx -= split_indx;
- }
- }
- if (cp->dpgno == ppgno) {
- if (cp->dindx < split_indx) {
- if (cleft)
- cp->dpgno = lpgno;
- } else {
- cp->dpgno = rpgno;
- cp->dindx -= split_indx;
- }
- }
- }
- DB_THREAD_UNLOCK(dbp);
-}
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
deleted file mode 100644
index 10bc095c9d..0000000000
--- a/db2/btree/bt_cursor.c
+++ /dev/null
@@ -1,1913 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c 10.81 (Sleepycat) 12/16/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "lock_ext.h"
-
-static int __bam_c_close __P((DBC *));
-static int __bam_c_del __P((DBC *, u_int32_t));
-static int __bam_c_destroy __P((DBC *));
-static int __bam_c_first __P((DBC *, CURSOR *));
-static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
-static int __bam_c_getstack __P((DBC *, CURSOR *));
-static int __bam_c_last __P((DBC *, CURSOR *));
-static int __bam_c_next __P((DBC *, CURSOR *, int));
-static int __bam_c_physdel __P((DBC *, CURSOR *, PAGE *));
-static int __bam_c_prev __P((DBC *, CURSOR *));
-static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
-static void __bam_c_reset __P((CURSOR *));
-static int __bam_c_rget __P((DBC *, DBT *, u_int32_t));
-static int __bam_c_search __P((DBC *, CURSOR *, const DBT *, u_int32_t, int *));
-static int __bam_dsearch __P((DBC *, CURSOR *, DBT *, u_int32_t *));
-
-/* Discard the current page/lock held by a cursor. */
-#undef DISCARD
-#define DISCARD(dbc, cp) { \
- if ((cp)->page != NULL) { \
- (void)memp_fput((dbc)->dbp->mpf, (cp)->page, 0); \
- (cp)->page = NULL; \
- } \
- if ((cp)->lock != LOCK_INVALID) { \
- (void)__BT_TLPUT((dbc), (cp)->lock); \
- (cp)->lock = LOCK_INVALID; \
- } \
-}
-
-/* If the cursor references a deleted record. */
-#undef IS_CUR_DELETED
-#define IS_CUR_DELETED(cp) \
- (((cp)->dpgno == PGNO_INVALID && \
- B_DISSET(GET_BKEYDATA((cp)->page, \
- (cp)->indx + O_INDX)->type)) || \
- ((cp)->dpgno != PGNO_INVALID && \
- B_DISSET(GET_BKEYDATA((cp)->page, (cp)->dindx)->type)))
-
-/* If the cursor and index combination references a deleted record. */
-#undef IS_DELETED
-#define IS_DELETED(cp, indx) \
- (((cp)->dpgno == PGNO_INVALID && \
- B_DISSET(GET_BKEYDATA((cp)->page, (indx) + O_INDX)->type)) || \
- ((cp)->dpgno != PGNO_INVALID && \
- B_DISSET(GET_BKEYDATA((cp)->page, (indx))->type)))
-
-/*
- * Test to see if two cursors could point to duplicates of the same key,
- * whether on-page or off-page. The leaf page numbers must be the same
- * in both cases. In the case of off-page duplicates, the key indices
- * on the leaf page will be the same. In the case of on-page duplicates,
- * the duplicate page number must not be set, and the key index offsets
- * must be the same. For the last test, as the saved copy of the cursor
- * will not have a valid page pointer, we use the cursor's.
- */
-#undef POSSIBLE_DUPLICATE
-#define POSSIBLE_DUPLICATE(cursor, saved_copy) \
- ((cursor)->pgno == (saved_copy).pgno && \
- ((cursor)->indx == (saved_copy).indx || \
- ((cursor)->dpgno == PGNO_INVALID && \
- (saved_copy).dpgno == PGNO_INVALID && \
- (cursor)->page->inp[(cursor)->indx] == \
- (cursor)->page->inp[(saved_copy).indx])))
-
-/*
- * __bam_c_reset --
- * Initialize internal cursor structure.
- */
-static void
-__bam_c_reset(cp)
- CURSOR *cp;
-{
- cp->sp = cp->csp = cp->stack;
- cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
- cp->page = NULL;
- cp->pgno = PGNO_INVALID;
- cp->indx = 0;
- cp->dpgno = PGNO_INVALID;
- cp->dindx = 0;
- cp->lock = LOCK_INVALID;
- cp->mode = DB_LOCK_NG;
- cp->recno = RECNO_OOB;
- cp->flags = 0;
-}
-
-/*
- * __bam_c_init --
- * Initialize the access private portion of a cursor
- *
- * PUBLIC: int __bam_c_init __P((DBC *));
- */
-int
-__bam_c_init(dbc)
- DBC *dbc;
-{
- DB *dbp;
- CURSOR *cp;
- int ret;
-
- if ((ret = __os_calloc(1, sizeof(CURSOR), &cp)) != 0)
- return (ret);
-
- dbp = dbc->dbp;
- cp->dbc = dbc;
-
- /*
- * Logical record numbers are always the same size, and we don't want
- * to have to check for space every time we return one. Allocate it
- * in advance.
- */
- if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
- if ((ret = __os_malloc(sizeof(db_recno_t),
- NULL, &dbc->rkey.data)) != 0) {
- __os_free(cp, sizeof(CURSOR));
- return (ret);
- }
- dbc->rkey.ulen = sizeof(db_recno_t);
- }
-
- /* Initialize methods. */
- dbc->internal = cp;
- if (dbp->type == DB_BTREE) {
- dbc->c_am_close = __bam_c_close;
- dbc->c_am_destroy = __bam_c_destroy;
- dbc->c_del = __bam_c_del;
- dbc->c_get = __bam_c_get;
- dbc->c_put = __bam_c_put;
- } else {
- dbc->c_am_close = __bam_c_close;
- dbc->c_am_destroy = __bam_c_destroy;
- dbc->c_del = __ram_c_del;
- dbc->c_get = __ram_c_get;
- dbc->c_put = __ram_c_put;
- }
-
- /* Initialize dynamic information. */
- __bam_c_reset(cp);
-
- return (0);
-}
-
-/*
- * __bam_c_close --
- * Close down the cursor from a single use.
- */
-static int
-__bam_c_close(dbc)
- DBC *dbc;
-{
- CURSOR *cp;
- DB *dbp;
- int ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- ret = 0;
-
- /*
- * If a cursor deleted a btree key, perform the actual deletion.
- * (Recno keys are either deleted immediately or never deleted.)
- */
- if (dbp->type == DB_BTREE && F_ISSET(cp, C_DELETED))
- ret = __bam_c_physdel(dbc, cp, NULL);
-
- /* Discard any locks not acquired inside of a transaction. */
- if (cp->lock != LOCK_INVALID) {
- (void)__BT_TLPUT(dbc, cp->lock);
- cp->lock = LOCK_INVALID;
- }
-
- /* Sanity checks. */
-#ifdef DIAGNOSTIC
- if (cp->csp != cp->stack)
- __db_err(dbp->dbenv, "btree cursor close: stack not empty");
-#endif
-
- /* Initialize dynamic information. */
- __bam_c_reset(cp);
-
- return (ret);
-}
-
-/*
- * __bam_c_destroy --
- * Close a single cursor -- internal version.
- */
-static int
-__bam_c_destroy(dbc)
- DBC *dbc;
-{
- /* Discard the structures. */
- __os_free(dbc->internal, sizeof(CURSOR));
-
- return (0);
-}
-
-/*
- * __bam_c_del --
- * Delete using a cursor.
- */
-static int
-__bam_c_del(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- CURSOR *cp;
- DB *dbp;
- DB_LOCK lock;
- PAGE *h;
- db_pgno_t pgno;
- db_indx_t indx;
- int ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- h = NULL;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_cdelchk(dbp, flags,
- F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
- return (ret);
-
- /*
- * If we are running CDB, this had better be either a write
- * cursor or an immediate writer.
- */
- if (F_ISSET(dbp, DB_AM_CDB))
- if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
- return (EINVAL);
-
- DEBUG_LWRITE(dbc, dbc->txn, "bam_c_del", NULL, NULL, flags);
-
- /* If already deleted, return failure. */
- if (F_ISSET(cp, C_DELETED))
- return (DB_KEYEMPTY);
-
- /*
- * We don't physically delete the record until the cursor moves,
- * so we have to have a long-lived write lock on the page instead
- * of a long-lived read lock. Note, we have to have a read lock
- * to even get here, so we simply discard it.
- */
- if (F_ISSET(dbp, DB_AM_LOCKING) && cp->mode != DB_LOCK_WRITE) {
- if ((ret = __bam_lget(dbc,
- 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
- goto err;
- (void)__BT_TLPUT(dbc, cp->lock);
- cp->lock = lock;
- cp->mode = DB_LOCK_WRITE;
- }
-
- /*
- * Acquire the underlying page (which may be different from the above
- * page because it may be a duplicate page), and set the on-page and
- * in-cursor delete flags. We don't need to lock it as we've already
- * write-locked the page leading to it.
- */
- if (cp->dpgno == PGNO_INVALID) {
- pgno = cp->pgno;
- indx = cp->indx;
- } else {
- pgno = cp->dpgno;
- indx = cp->dindx;
- }
-
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
- goto err;
-
- /* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cdel_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h),
- 0, dbp->log_fileid, PGNO(h), &LSN(h), indx)) != 0) {
- (void)memp_fput(dbp->mpf, h, 0);
- goto err;
- }
-
- /*
- * Set the intent-to-delete flag on the page and update all cursors. */
- if (cp->dpgno == PGNO_INVALID)
- B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
- else
- B_DSET(GET_BKEYDATA(h, indx)->type);
- (void)__bam_ca_delete(dbp, pgno, indx, 1);
-
- ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
- h = NULL;
-
- /*
- * If the tree has record numbers, we have to adjust the counts.
- *
- * !!!
- * This test is right -- we don't yet support duplicates and record
- * numbers in the same tree, so ignore duplicates if DB_BT_RECNUM
- * set.
- */
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
- if ((ret = __bam_c_getstack(dbc, cp)) != 0)
- goto err;
- if ((ret = __bam_adjust(dbc, -1)) != 0)
- goto err;
- (void)__bam_stkrel(dbc, 0);
- }
-
-err: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
- return (ret);
-}
-
-/*
- * __bam_c_get --
- * Get using a cursor (btree).
- */
-static int
-__bam_c_get(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- CURSOR *cp, copy, start;
- DB *dbp;
- PAGE *h;
- int exact, ret, tmp_rmw;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_cgetchk(dbp,
- key, data, flags, cp->pgno != PGNO_INVALID)) != 0)
- return (ret);
-
- /* Clear OR'd in additional bits so we can check for flag equality. */
- tmp_rmw = 0;
- if (LF_ISSET(DB_RMW)) {
- if (!F_ISSET(dbp, DB_AM_CDB)) {
- tmp_rmw = 1;
- F_SET(dbc, DBC_RMW);
- }
- LF_CLR(DB_RMW);
- }
-
- DEBUG_LREAD(dbc, dbc->txn, "bam_c_get",
- flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
-
- /*
- * Return a cursor's record number. It has nothing to do with the
- * cursor get code except that it's been rammed into the interface.
- */
- if (flags == DB_GET_RECNO) {
- ret = __bam_c_rget(dbc, data, flags);
- if (tmp_rmw)
- F_CLR(dbc, DBC_RMW);
- return (ret);
- }
-
- /*
- * Initialize the cursor for a new retrieval. Clear the cursor's
- * page pointer, it was set before this operation, and no longer
- * has any meaning.
- */
- cp->page = NULL;
- copy = *cp;
- cp->lock = LOCK_INVALID;
-
- switch (flags) {
- case DB_CURRENT:
- /* It's not possible to return a deleted record. */
- if (F_ISSET(cp, C_DELETED)) {
- ret = DB_KEYEMPTY;
- goto err;
- }
-
- /* Acquire the current page. */
- if ((ret = __bam_lget(dbc,
- 0, cp->pgno, DB_LOCK_READ, &cp->lock)) == 0)
- ret = memp_fget(dbp->mpf,
- cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno,
- 0, &cp->page);
- if (ret != 0)
- goto err;
- break;
- case DB_NEXT_DUP:
- if (cp->pgno == PGNO_INVALID) {
- ret = EINVAL;
- goto err;
- }
- if ((ret = __bam_c_next(dbc, cp, 1)) != 0)
- goto err;
-
- /* Make sure we didn't go past the end of the duplicates. */
- if (!POSSIBLE_DUPLICATE(cp, copy)) {
- ret = DB_NOTFOUND;
- goto err;
- }
- break;
- case DB_NEXT:
- if (cp->pgno != PGNO_INVALID) {
- if ((ret = __bam_c_next(dbc, cp, 1)) != 0)
- goto err;
- break;
- }
- /* FALLTHROUGH */
- case DB_FIRST:
- if ((ret = __bam_c_first(dbc, cp)) != 0)
- goto err;
- break;
- case DB_PREV:
- if (cp->pgno != PGNO_INVALID) {
- if ((ret = __bam_c_prev(dbc, cp)) != 0)
- goto err;
- break;
- }
- /* FALLTHROUGH */
- case DB_LAST:
- if ((ret = __bam_c_last(dbc, cp)) != 0)
- goto err;
- break;
- case DB_SET:
- if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
- goto err;
-
- /*
- * We cannot currently be referencing a deleted record, but we
- * may be referencing off-page duplicates.
- *
- * If we're referencing off-page duplicates, move off-page.
- * If we moved off-page, move to the next non-deleted record.
- * If we moved to the next non-deleted record, check to make
- * sure we didn't switch records because our current record
- * had no non-deleted data items.
- */
- start = *cp;
- if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
- goto err;
- if (cp->dpgno != PGNO_INVALID && IS_CUR_DELETED(cp)) {
- if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
- goto err;
- if (!POSSIBLE_DUPLICATE(cp, start)) {
- ret = DB_NOTFOUND;
- goto err;
- }
- }
- break;
- case DB_SET_RECNO:
- if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
- goto err;
- break;
- case DB_GET_BOTH:
- if (F_ISSET(dbc, DBC_CONTINUE | DBC_KEYSET)) {
- /* Acquire the current page. */
- if ((ret = memp_fget(dbp->mpf,
- cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno,
- 0, &cp->page)) != 0)
- goto err;
-
- /* If DBC_CONTINUE, move to the next item. */
- if (F_ISSET(dbc, DBC_CONTINUE) &&
- (ret = __bam_c_next(dbc, cp, 1)) != 0)
- goto err;
- } else {
- if ((ret =
- __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
- goto err;
-
- /*
- * We may be referencing a duplicates page. Move to
- * the first duplicate.
- */
- if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
- goto err;
- }
-
- /* Search for a matching entry. */
- if ((ret = __bam_dsearch(dbc, cp, data, NULL)) != 0)
- goto err;
-
- /* Ignore deleted entries. */
- if (IS_CUR_DELETED(cp)) {
- ret = DB_NOTFOUND;
- goto err;
- }
- break;
- case DB_SET_RANGE:
- if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
- goto err;
-
- /*
- * As we didn't require an exact match, the search function
- * may have returned an entry past the end of the page. If
- * so, move to the next entry.
- */
- if (cp->indx == NUM_ENT(cp->page) &&
- (ret = __bam_c_next(dbc, cp, 0)) != 0)
- goto err;
-
- /*
- * We may be referencing off-page duplicates, if so, move
- * off-page.
- */
- if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
- goto err;
-
- /*
- * We may be referencing a deleted record, if so, move to
- * the next non-deleted record.
- */
- if (IS_CUR_DELETED(cp) && (ret = __bam_c_next(dbc, cp, 0)) != 0)
- goto err;
- break;
- }
-
- /*
- * Return the key if the user didn't give us one. If we've moved to
- * a duplicate page, we may no longer have a pointer to the main page,
- * so we have to go get it. We know that it's already read-locked,
- * however, so we don't have to acquire a new lock.
- */
- if (flags != DB_SET) {
- if (cp->dpgno != PGNO_INVALID) {
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
- goto err;
- } else
- h = cp->page;
- ret = __db_ret(dbp,
- h, cp->indx, key, &dbc->rkey.data, &dbc->rkey.ulen);
- if (cp->dpgno != PGNO_INVALID)
- (void)memp_fput(dbp->mpf, h, 0);
- if (ret)
- goto err;
- }
-
- /* Return the data. */
- if ((ret = __db_ret(dbp, cp->page,
- cp->dpgno == PGNO_INVALID ? cp->indx + O_INDX : cp->dindx,
- data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
- goto err;
-
- /*
- * If the previous cursor record has been deleted, physically delete
- * the entry from the page. We clear the deleted flag before we call
- * the underlying delete routine so that, if an error occurs, and we
- * restore the cursor, the deleted flag is cleared. This is because,
- * if we manage to physically modify the page, and then restore the
- * cursor, we might try to repeat the page modification when closing
- * the cursor.
- */
- if (F_ISSET(&copy, C_DELETED)) {
- F_CLR(&copy, C_DELETED);
- if ((ret = __bam_c_physdel(dbc, &copy, cp->page)) != 0)
- goto err;
- }
- F_CLR(cp, C_DELETED);
-
- /* Release the previous lock, if any; the current lock is retained. */
- if (copy.lock != LOCK_INVALID)
- (void)__BT_TLPUT(dbc, copy.lock);
-
- /* Release the current page. */
- if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
- goto err;
-
- if (0) {
-err: if (cp->page != NULL)
- (void)memp_fput(dbp->mpf, cp->page, 0);
- if (cp->lock != LOCK_INVALID)
- (void)__BT_TLPUT(dbc, cp->lock);
- *cp = copy;
- }
-
- /* Release temporary lock upgrade. */
- if (tmp_rmw)
- F_CLR(dbc, DBC_RMW);
-
- return (ret);
-}
-
-/*
- * __bam_dsearch --
- * Search for a matching data item (or the first data item that's
- * equal to or greater than the one we're searching for).
- */
-static int
-__bam_dsearch(dbc, cp, data, iflagp)
- DBC *dbc;
- CURSOR *cp;
- DBT *data;
- u_int32_t *iflagp;
-{
- DB *dbp;
- CURSOR copy, last;
- int cmp, ret;
-
- dbp = dbc->dbp;
-
- /*
- * If iflagp is non-NULL, we're doing an insert.
- *
- * If the duplicates are off-page, use the duplicate search routine.
- */
- if (cp->dpgno != PGNO_INVALID) {
- if ((ret = __db_dsearch(dbc, iflagp != NULL,
- data, cp->dpgno, &cp->dindx, &cp->page, &cmp)) != 0)
- return (ret);
- cp->dpgno = cp->page->pgno;
-
- if (iflagp == NULL) {
- if (cmp != 0)
- return (DB_NOTFOUND);
- return (0);
- }
- *iflagp = DB_BEFORE;
- return (0);
- }
-
- /* Otherwise, do the search ourselves. */
- copy = *cp;
- for (;;) {
- /* Save the last interesting cursor position. */
- last = *cp;
-
- /* See if the data item matches the one we're looking for. */
- if ((cmp = __bam_cmp(dbp, data, cp->page, cp->indx + O_INDX,
- dbp->dup_compare == NULL ?
- __bam_defcmp : dbp->dup_compare)) == 0) {
- if (iflagp != NULL)
- *iflagp = DB_AFTER;
- return (0);
- }
-
- /*
- * If duplicate entries are sorted, we're done if we find a
- * page entry that sorts greater than the application item.
- * If doing an insert, return success, otherwise DB_NOTFOUND.
- */
- if (dbp->dup_compare != NULL && cmp < 0) {
- if (iflagp == NULL)
- return (DB_NOTFOUND);
- *iflagp = DB_BEFORE;
- return (0);
- }
-
- /*
- * Move to the next item. If we reach the end of the page and
- * we're doing an insert, set the cursor to the last item and
- * set the referenced memory location so callers know to insert
- * after the item, instead of before it. If not inserting, we
- * return DB_NOTFOUND.
- */
- if ((cp->indx += P_INDX) >= NUM_ENT(cp->page)) {
- if (iflagp == NULL)
- return (DB_NOTFOUND);
- goto use_last;
- }
-
- /*
- * Make sure we didn't go past the end of the duplicates. The
- * error conditions are the same as above.
- */
- if (!POSSIBLE_DUPLICATE(cp, copy)) {
- if (iflagp == NULL)
- return (DB_NOTFOUND);
-use_last: *cp = last;
- *iflagp = DB_AFTER;
- return (0);
- }
- }
- /* NOTREACHED */
-}
-
-/*
- * __bam_c_rget --
- * Return the record number for a cursor.
- */
-static int
-__bam_c_rget(dbc, data, flags)
- DBC *dbc;
- DBT *data;
- u_int32_t flags;
-{
- CURSOR *cp;
- DB *dbp;
- DBT dbt;
- db_recno_t recno;
- int exact, ret;
-
- COMPQUIET(flags, 0);
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- /* Get the page with the current item on it. */
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
- return (ret);
-
- /* Get a copy of the key. */
- memset(&dbt, 0, sizeof(DBT));
- dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
- if ((ret = __db_ret(dbp, cp->page, cp->indx, &dbt, NULL, NULL)) != 0)
- goto err;
-
- exact = 1;
- if ((ret = __bam_search(dbc, &dbt,
- F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
- 1, &recno, &exact)) != 0)
- goto err;
-
- ret = __db_retcopy(data, &recno, sizeof(recno),
- &dbc->rdata.data, &dbc->rdata.ulen, dbp->db_malloc);
-
- /* Release the stack. */
- __bam_stkrel(dbc, 0);
-
-err: (void)memp_fput(dbp->mpf, cp->page, 0);
- __os_free(dbt.data, dbt.size);
- return (ret);
-}
-
-/*
- * __bam_c_put --
- * Put using a cursor.
- */
-static int
-__bam_c_put(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- CURSOR *cp, copy;
- DB *dbp;
- DBT dbt;
- db_indx_t indx;
- db_pgno_t pgno;
- u_int32_t iiflags, iiop;
- int exact, needkey, ret, stack;
- void *arg;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- DB_PANIC_CHECK(dbp);
-
- DEBUG_LWRITE(dbc, dbc->txn, "bam_c_put",
- flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
- data, flags);
-
- if ((ret = __db_cputchk(dbp, key, data, flags,
- F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
- return (ret);
-
- /*
- * If we are running CDB, this had better be either a write
- * cursor or an immediate writer. If it's a regular writer,
- * that means we have an IWRITE lock and we need to upgrade
- * it to a write lock.
- */
- if (F_ISSET(dbp, DB_AM_CDB)) {
- if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
- return (EINVAL);
-
- if (F_ISSET(dbc, DBC_RMW) &&
- (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
- DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
- &dbc->mylock)) != 0)
- return (EAGAIN);
- }
-
- if (0) {
-split: /*
- * To split, we need a valid key for the page. Since it's a
- * cursor, we have to build one.
- *
- * Acquire a copy of a key from the page.
- */
- if (needkey) {
- memset(&dbt, 0, sizeof(DBT));
- if ((ret = __db_ret(dbp, cp->page, indx,
- &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
- goto err;
- arg = &dbt;
- } else
- arg = key;
-
- /*
- * Discard any locks and pinned pages (the locks are discarded
- * even if we're running with transactions, as they lock pages
- * that we're sorry we ever acquired). If stack is set and the
- * cursor entries are valid, they point to the same entries as
- * the stack, don't free them twice.
- */
- if (stack) {
- (void)__bam_stkrel(dbc, 1);
- stack = 0;
- } else
- DISCARD(dbc, cp);
-
- /*
- * Restore the cursor to its original value. This is necessary
- * for two reasons. First, we are about to copy it in case of
- * error, again. Second, we adjust cursors during the split,
- * and we have to ensure this cursor is adjusted appropriately,
- * along with all the other cursors.
- */
- *cp = copy;
-
- if ((ret = __bam_split(dbc, arg)) != 0)
- goto err;
- }
-
- /*
- * Initialize the cursor for a new retrieval. Clear the cursor's
- * page pointer, it was set before this operation, and no longer
- * has any meaning.
- */
- cp->page = NULL;
- copy = *cp;
- cp->lock = LOCK_INVALID;
-
- iiflags = needkey = ret = stack = 0;
- switch (flags) {
- case DB_AFTER:
- case DB_BEFORE:
- case DB_CURRENT:
- needkey = 1;
- if (cp->dpgno == PGNO_INVALID) {
- pgno = cp->pgno;
- indx = cp->indx;
- } else {
- pgno = cp->dpgno;
- indx = cp->dindx;
- }
-
- /*
- * !!!
- * This test is right -- we don't yet support duplicates and
- * record numbers in the same tree, so ignore duplicates if
- * DB_BT_RECNUM set.
- */
- if (F_ISSET(dbp, DB_BT_RECNUM) &&
- (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) {
- /* Acquire a complete stack. */
- if ((ret = __bam_c_getstack(dbc, cp)) != 0)
- goto err;
- cp->page = cp->csp->page;
-
- stack = 1;
- iiflags = BI_DOINCR;
- } else {
- /* Acquire the current page. */
- if ((ret = __bam_lget(dbc,
- 0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0)
- ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page);
- if (ret != 0)
- goto err;
-
- iiflags = 0;
- }
-
- /*
- * If the user has specified a duplicate comparison function,
- * we return an error if DB_CURRENT was specified and the
- * replacement data doesn't compare equal to the current data.
- * This stops apps from screwing up the duplicate sort order.
- */
- if (flags == DB_CURRENT && dbp->dup_compare != NULL)
- if (__bam_cmp(dbp, data,
- cp->page, indx, dbp->dup_compare) != 0) {
- ret = EINVAL;
- goto err;
- }
-
- iiop = flags;
- break;
- case DB_KEYFIRST:
- case DB_KEYLAST:
- /*
- * If we have a duplicate comparison function, we position to
- * the first of any on-page duplicates, and use __bam_dsearch
- * to search for the right slot. Otherwise, we position to
- * the first/last of any on-page duplicates based on the flag
- * value.
- */
- if ((ret = __bam_c_search(dbc, cp, key,
- flags == DB_KEYFIRST || dbp->dup_compare != NULL ?
- DB_KEYFIRST : DB_KEYLAST, &exact)) != 0)
- goto err;
- stack = 1;
-
- /*
- * If an exact match:
- * If duplicates aren't supported, replace the current
- * item. (When implementing the DB->put function, our
- * caller has already checked the DB_NOOVERWRITE flag.)
- *
- * If there's a duplicate comparison function, find the
- * correct slot for this duplicate item.
- *
- * If there's no duplicate comparison function, set the
- * insert flag based on the argument flags.
- *
- * If there's no match, the search function returned the
- * smallest slot greater than the key, use it.
- */
- if (exact) {
- if (F_ISSET(dbp, DB_AM_DUP)) {
- /*
- * If at off-page duplicate page, move to the
- * first or last entry -- if a comparison
- * function was specified, start searching at
- * the first entry. Otherwise, move based on
- * the DB_KEYFIRST/DB_KEYLAST flags.
- */
- if ((ret = __bam_dup(dbc, cp, cp->indx,
- dbp->dup_compare == NULL &&
- flags != DB_KEYFIRST)) != 0)
- goto err;
-
- /*
- * If there's a comparison function, search for
- * the correct slot. Otherwise, set the insert
- * flag based on the argment flag.
- */
- if (dbp->dup_compare == NULL)
- iiop = flags == DB_KEYFIRST ?
- DB_BEFORE : DB_AFTER;
- else
- if ((ret = __bam_dsearch(dbc,
- cp, data, &iiop)) != 0)
- goto err;
- } else
- iiop = DB_CURRENT;
- iiflags = 0;
- } else {
- iiop = DB_BEFORE;
- iiflags = BI_NEWKEY;
- }
-
- if (cp->dpgno == PGNO_INVALID) {
- pgno = cp->pgno;
- indx = cp->indx;
- } else {
- pgno = cp->dpgno;
- indx = cp->dindx;
- }
- break;
- }
-
- ret = __bam_iitem(dbc, &cp->page, &indx, key, data, iiop, iiflags);
-
- if (ret == DB_NEEDSPLIT)
- goto split;
- if (ret != 0)
- goto err;
-
- /*
- * Reset any cursors referencing this item that might have the item
- * marked for deletion.
- */
- if (iiop == DB_CURRENT) {
- (void)__bam_ca_delete(dbp, pgno, indx, 0);
-
- /*
- * It's also possible that we are the cursor that had the
- * item marked for deletion, in which case we want to make
- * sure that we don't delete it because we had the delete
- * flag set already.
- */
- if (cp->pgno == copy.pgno && cp->indx == copy.indx &&
- cp->dpgno == copy.dpgno && cp->dindx == copy.dindx)
- F_CLR(&copy, C_DELETED);
- }
-
- /*
- * Update the cursor to point to the new entry. The new entry was
- * stored on the current page, because we split pages until it was
- * possible.
- */
- if (cp->dpgno == PGNO_INVALID)
- cp->indx = indx;
- else
- cp->dindx = indx;
-
- /*
- * If the previous cursor record has been deleted, physically delete
- * the entry from the page. We clear the deleted flag before we call
- * the underlying delete routine so that, if an error occurs, and we
- * restore the cursor, the deleted flag is cleared. This is because,
- * if we manage to physically modify the page, and then restore the
- * cursor, we might try to repeat the page modification when closing
- * the cursor.
- */
- if (F_ISSET(&copy, C_DELETED)) {
- F_CLR(&copy, C_DELETED);
- if ((ret = __bam_c_physdel(dbc, &copy, cp->page)) != 0)
- goto err;
- }
- F_CLR(cp, C_DELETED);
-
- /* Release the previous lock, if any; the current lock is retained. */
- if (copy.lock != LOCK_INVALID)
- (void)__BT_TLPUT(dbc, copy.lock);
-
- /*
- * Discard any pages pinned in the tree and their locks, except for
- * the leaf page, for which we only discard the pin, not the lock.
- *
- * Note, the leaf page participated in the stack we acquired, and so
- * we have to adjust the stack as necessary. If there was only a
- * single page on the stack, we don't have to free further stack pages.
- */
- if (stack && BT_STK_POP(cp) != NULL)
- (void)__bam_stkrel(dbc, 0);
-
- /* Release the current page. */
- if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
- goto err;
-
- if (0) {
-err: /* Discard any pinned pages. */
- if (stack)
- (void)__bam_stkrel(dbc, 0);
- else
- DISCARD(dbc, cp);
- *cp = copy;
- }
-
- if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
- (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
- DB_LOCK_IWRITE, 0);
-
- return (ret);
-}
-
-/*
- * __bam_c_first --
- * Return the first record.
- */
-static int
-__bam_c_first(dbc, cp)
- DBC *dbc;
- CURSOR *cp;
-{
- DB *dbp;
- db_pgno_t pgno;
- int ret;
-
- dbp = dbc->dbp;
-
- /* Walk down the left-hand side of the tree. */
- for (pgno = PGNO_ROOT;;) {
- if ((ret =
- __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
-
- /* If we find a leaf page, we're done. */
- if (ISLEAF(cp->page))
- break;
-
- pgno = GET_BINTERNAL(cp->page, 0)->pgno;
- DISCARD(dbc, cp);
- }
-
- cp->pgno = cp->page->pgno;
- cp->indx = 0;
- cp->dpgno = PGNO_INVALID;
-
- /* Check for duplicates. */
- if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
- return (ret);
-
- /* If on an empty page or a deleted record, move to the next one. */
- if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp))
- if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
- return (ret);
-
- return (0);
-}
-
-/*
- * __bam_c_last --
- * Return the last record.
- */
-static int
-__bam_c_last(dbc, cp)
- DBC *dbc;
- CURSOR *cp;
-{
- DB *dbp;
- db_pgno_t pgno;
- int ret;
-
- dbp = dbc->dbp;
-
- /* Walk down the right-hand side of the tree. */
- for (pgno = PGNO_ROOT;;) {
- if ((ret =
- __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
-
- /* If we find a leaf page, we're done. */
- if (ISLEAF(cp->page))
- break;
-
- pgno =
- GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno;
- DISCARD(dbc, cp);
- }
-
- cp->pgno = cp->page->pgno;
- cp->indx = NUM_ENT(cp->page) == 0 ? 0 : NUM_ENT(cp->page) - P_INDX;
- cp->dpgno = PGNO_INVALID;
-
- /* Check for duplicates. */
- if ((ret = __bam_dup(dbc, cp, cp->indx, 1)) != 0)
- return (ret);
-
- /* If on an empty page or a deleted record, move to the next one. */
- if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp))
- if ((ret = __bam_c_prev(dbc, cp)) != 0)
- return (ret);
-
- return (0);
-}
-
-/*
- * __bam_c_next --
- * Move to the next record.
- */
-static int
-__bam_c_next(dbc, cp, initial_move)
- DBC *dbc;
- CURSOR *cp;
- int initial_move;
-{
- DB *dbp;
- db_indx_t adjust, indx;
- db_pgno_t pgno;
- int ret;
-
- dbp = dbc->dbp;
-
- /*
- * We're either moving through a page of duplicates or a btree leaf
- * page.
- */
- if (cp->dpgno == PGNO_INVALID) {
- adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX;
- pgno = cp->pgno;
- indx = cp->indx;
- } else {
- adjust = O_INDX;
- pgno = cp->dpgno;
- indx = cp->dindx;
- }
- if (cp->page == NULL) {
- if ((ret =
- __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
- }
-
- /*
- * If at the end of the page, move to a subsequent page.
- *
- * !!!
- * Check for >= NUM_ENT. If we're here as the result of a search that
- * landed us on NUM_ENT, we'll increment indx before we test.
- *
- * !!!
- * This code handles empty pages and pages with only deleted entries.
- */
- if (initial_move)
- indx += adjust;
- for (;;) {
- if (indx >= NUM_ENT(cp->page)) {
- /*
- * If we're in a btree leaf page, we've reached the end
- * of the tree. If we've reached the end of a page of
- * duplicates, continue from the btree leaf page where
- * we found this page of duplicates.
- */
- pgno = cp->page->next_pgno;
- if (pgno == PGNO_INVALID) {
- /* If in a btree leaf page, it's EOF. */
- if (cp->dpgno == PGNO_INVALID)
- return (DB_NOTFOUND);
-
- /* Continue from the last btree leaf page. */
- cp->dpgno = PGNO_INVALID;
-
- adjust = P_INDX;
- pgno = cp->pgno;
- indx = cp->indx + P_INDX;
- } else
- indx = 0;
-
- DISCARD(dbc, cp);
- if ((ret = __bam_lget(dbc,
- 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
- return (ret);
- if ((ret =
- memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
- continue;
- }
-
- /* Ignore deleted records. */
- if (IS_DELETED(cp, indx)) {
- indx += adjust;
- continue;
- }
-
- /*
- * If we're not in a duplicates page, check to see if we've
- * found a page of duplicates, in which case we move to the
- * first entry.
- */
- if (cp->dpgno == PGNO_INVALID) {
- cp->pgno = cp->page->pgno;
- cp->indx = indx;
-
- if ((ret = __bam_dup(dbc, cp, indx, 0)) != 0)
- return (ret);
- if (cp->dpgno != PGNO_INVALID) {
- indx = cp->dindx;
- adjust = O_INDX;
- continue;
- }
- } else {
- cp->dpgno = cp->page->pgno;
- cp->dindx = indx;
- }
- break;
- }
- return (0);
-}
-
-/*
- * __bam_c_prev --
- * Move to the previous record.
- */
-static int
-__bam_c_prev(dbc, cp)
- DBC *dbc;
- CURSOR *cp;
-{
- DB *dbp;
- db_indx_t indx, adjust;
- db_pgno_t pgno;
- int ret, set_indx;
-
- dbp = dbc->dbp;
-
- /*
- * We're either moving through a page of duplicates or a btree leaf
- * page.
- */
- if (cp->dpgno == PGNO_INVALID) {
- adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX;
- pgno = cp->pgno;
- indx = cp->indx;
- } else {
- adjust = O_INDX;
- pgno = cp->dpgno;
- indx = cp->dindx;
- }
- if (cp->page == NULL) {
- if ((ret =
- __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
- }
-
- /*
- * If at the beginning of the page, move to any previous one.
- *
- * !!!
- * This code handles empty pages and pages with only deleted entries.
- */
- for (;;) {
- if (indx == 0) {
- /*
- * If we're in a btree leaf page, we've reached the
- * beginning of the tree. If we've reached the first
- * of a page of duplicates, continue from the btree
- * leaf page where we found this page of duplicates.
- */
- pgno = cp->page->prev_pgno;
- if (pgno == PGNO_INVALID) {
- /* If in a btree leaf page, it's SOF. */
- if (cp->dpgno == PGNO_INVALID)
- return (DB_NOTFOUND);
-
- /* Continue from the last btree leaf page. */
- cp->dpgno = PGNO_INVALID;
-
- adjust = P_INDX;
- pgno = cp->pgno;
- indx = cp->indx;
- set_indx = 0;
- } else
- set_indx = 1;
-
- DISCARD(dbc, cp);
- if ((ret = __bam_lget(dbc,
- 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
- return (ret);
- if ((ret =
- memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
-
- if (set_indx)
- indx = NUM_ENT(cp->page);
- if (indx == 0)
- continue;
- }
-
- /* Ignore deleted records. */
- indx -= adjust;
- if (IS_DELETED(cp, indx))
- continue;
-
- /*
- * If we're not in a duplicates page, check to see if we've
- * found a page of duplicates, in which case we move to the
- * last entry.
- */
- if (cp->dpgno == PGNO_INVALID) {
- cp->pgno = cp->page->pgno;
- cp->indx = indx;
-
- if ((ret = __bam_dup(dbc, cp, indx, 1)) != 0)
- return (ret);
- if (cp->dpgno != PGNO_INVALID) {
- indx = cp->dindx + O_INDX;
- adjust = O_INDX;
- continue;
- }
- } else {
- cp->dpgno = cp->page->pgno;
- cp->dindx = indx;
- }
- break;
- }
- return (0);
-}
-
-/*
- * __bam_c_search --
- * Move to a specified record.
- */
-static int
-__bam_c_search(dbc, cp, key, flags, exactp)
- DBC *dbc;
- CURSOR *cp;
- const DBT *key;
- u_int32_t flags;
- int *exactp;
-{
- BTREE *t;
- DB *dbp;
- DB_LOCK lock;
- PAGE *h;
- db_recno_t recno;
- db_indx_t indx;
- u_int32_t sflags;
- int cmp, needexact, ret;
-
- dbp = dbc->dbp;
- t = dbp->internal;
-
- /* Find an entry in the database. */
- switch (flags) {
- case DB_SET_RECNO:
- if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
- return (ret);
- sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND;
- needexact = *exactp = 1;
- ret = __bam_rsearch(dbc, &recno, sflags, 1, exactp);
- break;
- case DB_SET:
- case DB_GET_BOTH:
- sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND;
- needexact = *exactp = 1;
- goto search;
- case DB_SET_RANGE:
- sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND;
- needexact = *exactp = 0;
- goto search;
- case DB_KEYFIRST:
- sflags = S_KEYFIRST;
- goto fast_search;
- case DB_KEYLAST:
- sflags = S_KEYLAST;
-fast_search: needexact = *exactp = 0;
- /*
- * If the application has a history of inserting into the first
- * or last pages of the database, we check those pages first to
- * avoid doing a full search.
- *
- * Record numbers can't be fast-tracked, the entire tree has to
- * be locked.
- */
- h = NULL;
- lock = LOCK_INVALID;
- if (F_ISSET(dbp, DB_BT_RECNUM))
- goto search;
-
- /* Check if the application has a history of sorted input. */
- if (t->bt_lpgno == PGNO_INVALID)
- goto search;
-
- /*
- * Lock and retrieve the page on which we did the last insert.
- * It's okay if it doesn't exist, or if it's not the page type
- * we expected, it just means that the world changed.
- */
- if (__bam_lget(dbc, 0, t->bt_lpgno, DB_LOCK_WRITE, &lock))
- goto fast_miss;
- if (memp_fget(dbp->mpf, &t->bt_lpgno, 0, &h))
- goto fast_miss;
- if (TYPE(h) != P_LBTREE)
- goto fast_miss;
- if (NUM_ENT(h) == 0)
- goto fast_miss;
-
- /*
- * What we do here is test to see if we're at the beginning or
- * end of the tree and if the new item sorts before/after the
- * first/last page entry. We don't try and catch inserts into
- * the middle of the tree (although we could, as long as there
- * were two keys on the page and we saved both the index and
- * the page number of the last insert).
- */
- if (h->next_pgno == PGNO_INVALID) {
- indx = NUM_ENT(h) - P_INDX;
- if ((cmp =
- __bam_cmp(dbp, key, h, indx, t->bt_compare)) < 0)
- goto try_begin;
- if (cmp > 0) {
- indx += P_INDX;
- goto fast_hit;
- }
-
- /*
- * Found a duplicate. If doing DB_KEYLAST, we're at
- * the correct position, otherwise, move to the first
- * of the duplicates.
- */
- if (flags == DB_KEYLAST)
- goto fast_hit;
- for (;
- indx > 0 && h->inp[indx - P_INDX] == h->inp[indx];
- indx -= P_INDX)
- ;
- goto fast_hit;
- }
-try_begin: if (h->prev_pgno == PGNO_INVALID) {
- indx = 0;
- if ((cmp =
- __bam_cmp(dbp, key, h, indx, t->bt_compare)) > 0)
- goto fast_miss;
- if (cmp < 0)
- goto fast_hit;
- /*
- * Found a duplicate. If doing DB_KEYFIRST, we're at
- * the correct position, otherwise, move to the last
- * of the duplicates.
- */
- if (flags == DB_KEYFIRST)
- goto fast_hit;
- for (;
- indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX];
- indx += P_INDX)
- ;
- goto fast_hit;
- }
- goto fast_miss;
-
-fast_hit: /* Set the exact match flag, we may have found a duplicate. */
- *exactp = cmp == 0;
-
- /* Enter the entry in the stack. */
- BT_STK_CLR(cp);
- BT_STK_ENTER(cp, h, indx, lock, ret);
- break;
-
-fast_miss: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
- if (lock != LOCK_INVALID)
- (void)__BT_LPUT(dbc, lock);
-
-search: ret = __bam_search(dbc, key, sflags, 1, NULL, exactp);
- break;
- default: /* XXX: Impossible. */
- abort();
- /* NOTREACHED */
- }
- if (ret != 0)
- return (ret);
-
- /*
- * Initialize the cursor to reference it. This has to be done
- * before we return (even with DB_NOTFOUND) because we have to
- * free the page(s) we locked in __bam_search.
- */
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
- cp->lock = cp->csp->lock;
- cp->dpgno = PGNO_INVALID;
-
- /*
- * If we inserted a key into the first or last slot of the tree,
- * remember where it was so we can do it more quickly next time.
- */
- if (flags == DB_KEYFIRST || flags == DB_KEYLAST)
- t->bt_lpgno =
- ((cp->page->next_pgno == PGNO_INVALID &&
- cp->indx >= NUM_ENT(cp->page)) ||
- (cp->page->prev_pgno == PGNO_INVALID && cp->indx == 0)) ?
- cp->pgno : PGNO_INVALID;
-
- /* If we need an exact match and didn't find one, we're done. */
- if (needexact && *exactp == 0)
- return (DB_NOTFOUND);
-
- return (0);
-}
-
-/*
- * __bam_dup --
- * Check for an off-page duplicates entry, and if found, move to the
- * first or last entry.
- *
- * PUBLIC: int __bam_dup __P((DBC *, CURSOR *, u_int32_t, int));
- */
-int
-__bam_dup(dbc, cp, indx, last_dup)
- DBC *dbc;
- CURSOR *cp;
- u_int32_t indx;
- int last_dup;
-{
- BOVERFLOW *bo;
- DB *dbp;
- db_pgno_t pgno;
- int ret;
-
- dbp = dbc->dbp;
-
- /*
- * Check for an overflow entry. If we find one, move to the
- * duplicates page, and optionally move to the last record on
- * that page.
- *
- * !!!
- * We don't lock duplicates pages, we've already got the correct
- * lock on the main page.
- */
- bo = GET_BOVERFLOW(cp->page, indx + O_INDX);
- if (B_TYPE(bo->type) != B_DUPLICATE)
- return (0);
-
- pgno = bo->pgno;
- if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
- return (ret);
- cp->page = NULL;
- if (last_dup) {
- if ((ret = __db_dend(dbc, pgno, &cp->page)) != 0)
- return (ret);
- indx = NUM_ENT(cp->page) - O_INDX;
- } else {
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
- return (ret);
- indx = 0;
- }
-
- /* Update the cursor's duplicate information. */
- cp->dpgno = cp->page->pgno;
- cp->dindx = indx;
-
- return (0);
-}
-
-/*
- * __bam_c_physdel --
- * Actually do the cursor deletion.
- */
-static int
-__bam_c_physdel(dbc, cp, h)
- DBC *dbc;
- CURSOR *cp;
- PAGE *h;
-{
- enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd;
- BOVERFLOW bo;
- DB *dbp;
- DBT dbt;
- DB_LOCK lock;
- db_indx_t indx;
- db_pgno_t pgno, next_pgno, prev_pgno;
- int delete_page, local_page, ret;
-
- dbp = dbc->dbp;
-
- delete_page = ret = 0;
-
- /* Figure out what we're deleting. */
- if (cp->dpgno == PGNO_INVALID) {
- pgno = cp->pgno;
- indx = cp->indx;
- } else {
- pgno = cp->dpgno;
- indx = cp->dindx;
- }
-
- /*
- * If the item is referenced by another cursor, set that cursor's
- * delete flag and leave it up to it to do the delete.
- *
- * !!!
- * This test for > 0 is a tricky. There are two ways that we can
- * be called here. Either we are closing the cursor or we've moved
- * off the page with the deleted entry. In the first case, we've
- * already removed the cursor from the active queue, so we won't see
- * it in __bam_ca_delete. In the second case, it will be on a different
- * item, so we won't bother with it in __bam_ca_delete.
- */
- if (__bam_ca_delete(dbp, pgno, indx, 1) > 0)
- return (0);
-
- /*
- * If this is concurrent DB, upgrade the lock if necessary.
- */
- if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW) &&
- (ret = lock_get(dbp->dbenv->lk_info,
- dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
- &dbc->mylock)) != 0)
- return (EAGAIN);
-
- /*
- * If we don't already have the page locked, get it and delete the
- * items.
- */
- if ((h == NULL || h->pgno != pgno)) {
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
- return (ret);
- local_page = 1;
- } else
- local_page = 0;
-
- /*
- * If we're deleting a duplicate entry and there are other duplicate
- * entries remaining, call the common code to do the work and fix up
- * the parent page as necessary. Otherwise, do a normal btree delete.
- *
- * There are 5 possible cases:
- *
- * 1. It's not a duplicate item: do a normal btree delete.
- * 2. It's a duplicate item:
- * 2a: We delete an item from a page of duplicates, but there are
- * more items on the page.
- * 2b: We delete the last item from a page of duplicates, deleting
- * the last duplicate.
- * 2c: We delete the last item from a page of duplicates, but there
- * is a previous page of duplicates.
- * 2d: We delete the last item from a page of duplicates, but there
- * is a following page of duplicates.
- *
- * In the case of:
- *
- * 1: There's nothing further to do.
- * 2a: There's nothing further to do.
- * 2b: Do the normal btree delete instead of a duplicate delete, as
- * that deletes both the duplicate chain and the parent page's
- * entry.
- * 2c: There's nothing further to do.
- * 2d: Delete the duplicate, and update the parent page's entry.
- */
- if (TYPE(h) == P_DUPLICATE) {
- pgno = PGNO(h);
- prev_pgno = PREV_PGNO(h);
- next_pgno = NEXT_PGNO(h);
-
- if (NUM_ENT(h) == 1 &&
- prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID)
- cmd = DELETE_PAGE;
- else {
- cmd = DELETE_ITEM;
-
- /* Delete the duplicate. */
- if ((ret = __db_drem(dbc, &h, indx, __bam_free)) != 0)
- goto err;
-
- /*
- * 2a: h != NULL, h->pgno == pgno
- * 2b: We don't reach this clause, as the above test
- * was true.
- * 2c: h == NULL, prev_pgno != PGNO_INVALID
- * 2d: h != NULL, next_pgno != PGNO_INVALID
- *
- * Test for 2a and 2c: if we didn't empty the current
- * page or there was a previous page of duplicates, we
- * don't need to touch the parent page.
- */
- if ((h != NULL && pgno == h->pgno) ||
- prev_pgno != PGNO_INVALID)
- cmd = NOTHING_FURTHER;
- }
-
- /*
- * Release any page we're holding and its lock.
- *
- * !!!
- * If there is no subsequent page in the duplicate chain, then
- * __db_drem will have put page "h" and set it to NULL.
- */
- if (local_page) {
- if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_TLPUT(dbc, lock);
- local_page = 0;
- }
-
- if (cmd == NOTHING_FURTHER)
- goto done;
-
- /* Acquire the parent page and switch the index to its entry. */
- if ((ret =
- __bam_lget(dbc, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) {
- (void)__BT_TLPUT(dbc, lock);
- goto err;
- }
- local_page = 1;
- indx = cp->indx;
-
- if (cmd == DELETE_PAGE)
- goto btd;
-
- /*
- * Copy, delete, update, add-back the parent page's data entry.
- *
- * XXX
- * This may be a performance/logging problem. We should add a
- * log message which simply logs/updates a random set of bytes
- * on a page, and use it instead of doing a delete/add pair.
- */
- indx += O_INDX;
- bo = *GET_BOVERFLOW(h, indx);
- (void)__db_ditem(dbc, h, indx, BOVERFLOW_SIZE);
- bo.pgno = next_pgno;
- memset(&dbt, 0, sizeof(dbt));
- dbt.data = &bo;
- dbt.size = BOVERFLOW_SIZE;
- (void)__db_pitem(dbc, h, indx, BOVERFLOW_SIZE, &dbt, NULL);
- (void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
- goto done;
- }
-
-btd: /*
- * If the page is going to be emptied, delete it. To delete a leaf
- * page we need a copy of a key from the page. We use the 0th page
- * index since it's the last key that the page held.
- *
- * We malloc the page information instead of using the return key/data
- * memory because we've already set them -- the reason we've already
- * set them is because we're (potentially) about to do a reverse split,
- * which would make our saved page information useless.
- *
- * !!!
- * The following operations to delete a page might deadlock. I think
- * that's OK. The problem is if we're deleting an item because we're
- * closing cursors because we've already deadlocked and want to call
- * txn_abort(). If we fail due to deadlock, we leave a locked empty
- * page in the tree, which won't be empty long because we're going to
- * undo the delete.
- */
- if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) {
- memset(&dbt, 0, sizeof(DBT));
- dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
- if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
- goto err;
- delete_page = 1;
- }
-
- /*
- * Do a normal btree delete.
- *
- * !!!
- * Delete the key item first, otherwise the duplicate checks in
- * __bam_ditem() won't work!
- */
- if ((ret = __bam_ditem(dbc, h, indx)) != 0)
- goto err;
- if ((ret = __bam_ditem(dbc, h, indx)) != 0)
- goto err;
-
- /* Discard any remaining locks/pages. */
- if (local_page) {
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_TLPUT(dbc, lock);
- local_page = 0;
- }
-
- /* Delete the page if it was emptied. */
- if (delete_page)
- ret = __bam_dpage(dbc, &dbt);
-
-err:
-done: if (delete_page)
- __os_free(dbt.data, dbt.size);
-
- if (local_page) {
- /*
- * It's possible for h to be NULL, as __db_drem may have
- * been relinking pages by the time that it deadlocked.
- */
- if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_TLPUT(dbc, lock);
- }
-
- if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
- (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
- DB_LOCK_IWRITE, 0);
-
- return (ret);
-}
-
-/*
- * __bam_c_getstack --
- * Acquire a full stack for a cursor.
- */
-static int
-__bam_c_getstack(dbc, cp)
- DBC *dbc;
- CURSOR *cp;
-{
- DB *dbp;
- DBT dbt;
- PAGE *h;
- db_pgno_t pgno;
- int exact, ret;
-
- dbp = dbc->dbp;
- h = NULL;
- memset(&dbt, 0, sizeof(DBT));
- ret = 0;
-
- /* Get the page with the current item on it. */
- pgno = cp->pgno;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
- return (ret);
-
- /* Get a copy of a key from the page. */
- dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
- if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
- goto err;
-
- /* Get a write-locked stack for that page. */
- exact = 0;
- ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact);
-
- /* We no longer need the key or the page. */
-err: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
- if (dbt.data != NULL)
- __os_free(dbt.data, dbt.size);
- return (ret);
-}
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
deleted file mode 100644
index d623bd8a6f..0000000000
--- a/db2/btree/bt_delete.c
+++ /dev/null
@@ -1,589 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c 10.43 (Sleepycat) 12/7/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_delete --
- * Delete the items referenced by a key.
- *
- * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
- */
-int
-__bam_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DBC *dbc;
- DBT data;
- u_int32_t f_init, f_next;
- int ret, t_ret;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret =
- __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags);
-
- /*
- * Walk a cursor through the key/data pairs, deleting as we go. Set
- * the DB_DBT_USERMEM flag, as this might be a threaded application
- * and the flags checking will catch us. We don't actually want the
- * keys or data, so request a partial of length 0.
- */
- memset(&data, 0, sizeof(data));
- F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
- /* If locking, set read-modify-write flag. */
- f_init = DB_SET;
- f_next = DB_NEXT_DUP;
- if (dbp->dbenv != NULL && dbp->dbenv->lk_info != NULL) {
- f_init |= DB_RMW;
- f_next |= DB_RMW;
- }
-
- /* Walk through the set of key/data pairs, deleting as we go. */
- if ((ret = dbc->c_get(dbc, key, &data, f_init)) != 0)
- goto err;
- for (;;) {
- if ((ret = dbc->c_del(dbc, 0)) != 0)
- goto err;
- if ((ret = dbc->c_get(dbc, key, &data, f_next)) != 0) {
- if (ret == DB_NOTFOUND) {
- ret = 0;
- break;
- }
- goto err;
- }
- }
-
-err: /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 &&
- (ret == 0 || ret == DB_NOTFOUND))
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __bam_ditem --
- * Delete one or more entries from a page.
- *
- * PUBLIC: int __bam_ditem __P((DBC *, PAGE *, u_int32_t));
- */
-int
-__bam_ditem(dbc, h, indx)
- DBC *dbc;
- PAGE *h;
- u_int32_t indx;
-{
- BINTERNAL *bi;
- BKEYDATA *bk;
- BOVERFLOW *bo;
- DB *dbp;
- u_int32_t nbytes;
- int ret;
-
- dbp = dbc->dbp;
-
- switch (TYPE(h)) {
- case P_IBTREE:
- bi = GET_BINTERNAL(h, indx);
- switch (B_TYPE(bi->type)) {
- case B_DUPLICATE:
- case B_OVERFLOW:
- nbytes = BINTERNAL_SIZE(bi->len);
- bo = (BOVERFLOW *)bi->data;
- goto offpage;
- case B_KEYDATA:
- nbytes = BINTERNAL_SIZE(bi->len);
- break;
- default:
- return (__db_pgfmt(dbp, h->pgno));
- }
- break;
- case P_IRECNO:
- nbytes = RINTERNAL_SIZE;
- break;
- case P_LBTREE:
- /*
- * If it's a duplicate key, discard the index and don't touch
- * the actual page item.
- *
- * XXX
- * This works because no data item can have an index matching
- * any other index so even if the data item is in a key "slot",
- * it won't match any other index.
- */
- if ((indx % 2) == 0) {
- /*
- * Check for a duplicate after us on the page. NOTE:
- * we have to delete the key item before deleting the
- * data item, otherwise the "indx + P_INDX" calculation
- * won't work!
- */
- if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
- h->inp[indx] == h->inp[indx + P_INDX])
- return (__bam_adjindx(dbc,
- h, indx, indx + O_INDX, 0));
- /*
- * Check for a duplicate before us on the page. It
- * doesn't matter if we delete the key item before or
- * after the data item for the purposes of this one.
- */
- if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
- return (__bam_adjindx(dbc,
- h, indx, indx - P_INDX, 0));
- }
- /* FALLTHROUGH */
- case P_LRECNO:
- bk = GET_BKEYDATA(h, indx);
- switch (B_TYPE(bk->type)) {
- case B_DUPLICATE:
- case B_OVERFLOW:
- nbytes = BOVERFLOW_SIZE;
- bo = GET_BOVERFLOW(h, indx);
-
-offpage: /* Delete duplicate/offpage chains. */
- if (B_TYPE(bo->type) == B_DUPLICATE) {
- if ((ret =
- __db_ddup(dbc, bo->pgno, __bam_free)) != 0)
- return (ret);
- } else
- if ((ret =
- __db_doff(dbc, bo->pgno, __bam_free)) != 0)
- return (ret);
- break;
- case B_KEYDATA:
- nbytes = BKEYDATA_SIZE(bk->len);
- break;
- default:
- return (__db_pgfmt(dbp, h->pgno));
- }
- break;
- default:
- return (__db_pgfmt(dbp, h->pgno));
- }
-
- /* Delete the item. */
- if ((ret = __db_ditem(dbc, h, indx, nbytes)) != 0)
- return (ret);
-
- /* Mark the page dirty. */
- return (memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY));
-}
-
-/*
- * __bam_adjindx --
- * Adjust an index on the page.
- *
- * PUBLIC: int __bam_adjindx __P((DBC *, PAGE *, u_int32_t, u_int32_t, int));
- */
-int
-__bam_adjindx(dbc, h, indx, indx_copy, is_insert)
- DBC *dbc;
- PAGE *h;
- u_int32_t indx, indx_copy;
- int is_insert;
-{
- DB *dbp;
- db_indx_t copy;
- int ret;
-
- dbp = dbc->dbp;
-
- /* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_adj_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h),
- 0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy,
- (u_int32_t)is_insert)) != 0)
- return (ret);
-
- if (is_insert) {
- copy = h->inp[indx_copy];
- if (indx != NUM_ENT(h))
- memmove(&h->inp[indx + O_INDX], &h->inp[indx],
- sizeof(db_indx_t) * (NUM_ENT(h) - indx));
- h->inp[indx] = copy;
- ++NUM_ENT(h);
- } else {
- --NUM_ENT(h);
- if (indx != NUM_ENT(h))
- memmove(&h->inp[indx], &h->inp[indx + O_INDX],
- sizeof(db_indx_t) * (NUM_ENT(h) - indx));
- }
-
- /* Mark the page dirty. */
- ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
-
- /* Adjust the cursors. */
- __bam_ca_di(dbp, h->pgno, indx, is_insert ? 1 : -1);
- return (0);
-}
-
-/*
- * __bam_dpage --
- * Delete a page from the tree.
- *
- * PUBLIC: int __bam_dpage __P((DBC *, const DBT *));
- */
-int
-__bam_dpage(dbc, key)
- DBC *dbc;
- const DBT *key;
-{
- CURSOR *cp;
- DB *dbp;
- DB_LOCK lock;
- PAGE *h;
- db_pgno_t pgno;
- int level; /* !!!: has to hold number of tree levels. */
- int exact, ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- ret = 0;
-
- /*
- * The locking protocol is that we acquire locks by walking down the
- * tree, to avoid the obvious deadlocks.
- *
- * Call __bam_search to reacquire the empty leaf page, but this time
- * get both the leaf page and it's parent, locked. Walk back up the
- * tree, until we have the top pair of pages that we want to delete.
- * Once we have the top page that we want to delete locked, lock the
- * underlying pages and check to make sure they're still empty. If
- * they are, delete them.
- */
- for (level = LEAFLEVEL;; ++level) {
- /* Acquire a page and its parent, locked. */
- if ((ret =
- __bam_search(dbc, key, S_WRPAIR, level, NULL, &exact)) != 0)
- return (ret);
-
- /*
- * If we reach the root or the page isn't going to be empty
- * when we delete one record, quit.
- */
- h = cp->csp[-1].page;
- if (h->pgno == PGNO_ROOT || NUM_ENT(h) != 1)
- break;
-
- /* Release the two locked pages. */
- (void)memp_fput(dbp->mpf, cp->csp[-1].page, 0);
- (void)__BT_TLPUT(dbc, cp->csp[-1].lock);
- (void)memp_fput(dbp->mpf, cp->csp[0].page, 0);
- (void)__BT_TLPUT(dbc, cp->csp[0].lock);
- }
-
- /*
- * Leave the stack pointer one after the last entry, we may be about
- * to push more items on the stack.
- */
- ++cp->csp;
-
- /*
- * cp->csp[-2].page is the top page, which we're not going to delete,
- * and cp->csp[-1].page is the first page we are going to delete.
- *
- * Walk down the chain, acquiring the rest of the pages until we've
- * retrieved the leaf page. If we find any pages that aren't going
- * to be emptied by the delete, someone else added something while we
- * were walking the tree, and we discontinue the delete.
- */
- for (h = cp->csp[-1].page;;) {
- if (ISLEAF(h)) {
- if (NUM_ENT(h) != 0)
- goto release;
- break;
- } else
- if (NUM_ENT(h) != 1)
- goto release;
-
- /*
- * Get the next page, write lock it and push it onto the stack.
- * We know it's index 0, because it can only have one element.
- */
- pgno = TYPE(h) == P_IBTREE ?
- GET_BINTERNAL(h, 0)->pgno : GET_RINTERNAL(h, 0)->pgno;
-
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
- goto release;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
- goto release;
- BT_STK_PUSH(cp, h, 0, lock, ret);
- }
-
- /* Adjust back to reference the last page on the stack. */
- BT_STK_POP(cp);
-
- /* Delete the pages. */
- return (__bam_dpages(dbc));
-
-release:
- /* Adjust back to reference the last page on the stack. */
- BT_STK_POP(cp);
-
- /* Discard any locked pages and return. */
- __bam_stkrel(dbc, 0);
-
- return (ret);
-}
-
-/*
- * __bam_dpages --
- * Delete a set of locked pages.
- *
- * PUBLIC: int __bam_dpages __P((DBC *));
- */
-int
-__bam_dpages(dbc)
- DBC *dbc;
-{
- CURSOR *cp;
- DB *dbp;
- DBT a, b;
- DB_LOCK c_lock, p_lock;
- EPG *epg;
- PAGE *child, *parent;
- db_indx_t nitems;
- db_pgno_t pgno;
- db_recno_t rcnt;
- int done, ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- epg = cp->sp;
-
- /*
- * !!!
- * There is an interesting deadlock situation here. We have to relink
- * the leaf page chain around the leaf page being deleted. Consider
- * a cursor walking through the leaf pages, that has the previous page
- * read-locked and is waiting on a lock for the page we're deleting.
- * It will deadlock here. This is a problem, because if our process is
- * selected to resolve the deadlock, we'll leave an empty leaf page
- * that we can never again access by walking down the tree. So, before
- * we unlink the subtree, we relink the leaf page chain.
- */
- if ((ret = __db_relink(dbc, DB_REM_PAGE, cp->csp->page, NULL, 1)) != 0)
- goto release;
-
- /*
- * We have the entire stack of deletable pages locked.
- *
- * Delete the highest page in the tree's reference to the underlying
- * stack of pages. Then, release that page, letting the rest of the
- * tree get back to business.
- */
- if ((ret = __bam_ditem(dbc, epg->page, epg->indx)) != 0) {
-release: (void)__bam_stkrel(dbc, 0);
- return (ret);
- }
-
- pgno = epg->page->pgno;
- nitems = NUM_ENT(epg->page);
-
- (void)memp_fput(dbp->mpf, epg->page, 0);
- (void)__BT_TLPUT(dbc, epg->lock);
-
- /*
- * Free the rest of the stack of pages.
- *
- * !!!
- * Don't bother checking for errors. We've unlinked the subtree from
- * the tree, and there's no possibility of recovery outside of doing
- * TXN rollback.
- */
- while (++epg <= cp->csp) {
- /*
- * Delete page entries so they will be restored as part of
- * recovery.
- */
- if (NUM_ENT(epg->page) != 0)
- (void)__bam_ditem(dbc, epg->page, epg->indx);
-
- (void)__bam_free(dbc, epg->page);
- (void)__BT_TLPUT(dbc, epg->lock);
- }
- BT_STK_CLR(cp);
-
- /*
- * Try and collapse the tree a level -- this is only applicable
- * if we've deleted the next-to-last element from the root page.
- *
- * There are two cases when collapsing a tree.
- *
- * If we've just deleted the last item from the root page, there is no
- * further work to be done. The code above has emptied the root page
- * and freed all pages below it.
- */
- if (pgno != PGNO_ROOT || nitems != 1)
- return (0);
-
- /*
- * If we just deleted the next-to-last item from the root page, the
- * tree can collapse one or more levels. While there remains only a
- * single item on the root page, write lock the last page referenced
- * by the root page and copy it over the root page. If we can't get a
- * write lock, that's okay, the tree just stays deeper than we'd like.
- */
- for (done = 0; !done;) {
- /* Initialize. */
- parent = child = NULL;
- p_lock = c_lock = LOCK_INVALID;
-
- /* Lock the root. */
- pgno = PGNO_ROOT;
- if ((ret =
- __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &p_lock)) != 0)
- goto stop;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &parent)) != 0)
- goto stop;
-
- if (NUM_ENT(parent) != 1 ||
- (TYPE(parent) != P_IBTREE && TYPE(parent) != P_IRECNO))
- goto stop;
-
- pgno = TYPE(parent) == P_IBTREE ?
- GET_BINTERNAL(parent, 0)->pgno :
- GET_RINTERNAL(parent, 0)->pgno;
-
- /* Lock the child page. */
- if ((ret =
- __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &c_lock)) != 0)
- goto stop;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &child)) != 0)
- goto stop;
-
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- memset(&a, 0, sizeof(a));
- a.data = child;
- a.size = dbp->pgsize;
- memset(&b, 0, sizeof(b));
- b.data = P_ENTRY(parent, 0);
- b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
- __bam_rsplit_log(dbp->dbenv->lg_info, dbc->txn,
- &child->lsn, 0, dbp->log_fileid, child->pgno, &a,
- RE_NREC(parent), &b, &parent->lsn);
- }
-
- /*
- * Make the switch.
- *
- * One fixup -- if the tree has record numbers and we're not
- * converting to a leaf page, we have to preserve the total
- * record count. Note that we are about to overwrite everything
- * on the parent, including its LSN. This is actually OK,
- * because the above log message, which describes this update,
- * stores its LSN on the child page. When the child is copied
- * to the parent, the correct LSN is going to copied into
- * place in the parent.
- */
- COMPQUIET(rcnt, 0);
- if (TYPE(child) == P_IRECNO ||
- (TYPE(child) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
- rcnt = RE_NREC(parent);
- memcpy(parent, child, dbp->pgsize);
- parent->pgno = PGNO_ROOT;
- if (TYPE(child) == P_IRECNO ||
- (TYPE(child) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
- RE_NREC_SET(parent, rcnt);
-
- /* Mark the pages dirty. */
- memp_fset(dbp->mpf, parent, DB_MPOOL_DIRTY);
- memp_fset(dbp->mpf, child, DB_MPOOL_DIRTY);
-
- /* Adjust the cursors. */
- __bam_ca_rsplit(dbp, child->pgno, PGNO_ROOT);
-
- /*
- * Free the page copied onto the root page and discard its
- * lock. (The call to __bam_free() discards our reference
- * to the page.)
- */
- (void)__bam_free(dbc, child);
- child = NULL;
-
- if (0) {
-stop: done = 1;
- }
- if (p_lock != LOCK_INVALID)
- (void)__BT_TLPUT(dbc, p_lock);
- if (parent != NULL)
- memp_fput(dbp->mpf, parent, 0);
- if (c_lock != LOCK_INVALID)
- (void)__BT_TLPUT(dbc, c_lock);
- if (child != NULL)
- memp_fput(dbp->mpf, child, 0);
- }
-
- return (0);
-}
diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c
deleted file mode 100644
index a89cfccb97..0000000000
--- a/db2/btree/bt_open.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_open.c 10.39 (Sleepycat) 11/21/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <limits.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_open --
- * Open a btree.
- *
- * PUBLIC: int __bam_open __P((DB *, DB_INFO *));
- */
-int
-__bam_open(dbp, dbinfo)
- DB *dbp;
- DB_INFO *dbinfo;
-{
- BTREE *t;
- int ret;
-
- /* Allocate and initialize the private btree structure. */
- if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0)
- return (ret);
- dbp->internal = t;
-
- /*
- * Intention is to make sure all of the user's selections are okay
- * here and then use them without checking.
- */
- if (dbinfo == NULL) {
- t->bt_minkey = DEFMINKEYPAGE;
- t->bt_compare = __bam_defcmp;
- t->bt_prefix = __bam_defpfx;
- } else {
- /* Minimum number of keys per page. */
- if (dbinfo->bt_minkey == 0)
- t->bt_minkey = DEFMINKEYPAGE;
- else {
- if (dbinfo->bt_minkey < 2)
- goto einval;
- t->bt_minkey = dbinfo->bt_minkey;
- }
-
- /* Maximum number of keys per page. */
- if (dbinfo->bt_maxkey == 0)
- t->bt_maxkey = 0;
- else {
- if (dbinfo->bt_maxkey < 1)
- goto einval;
- t->bt_maxkey = dbinfo->bt_maxkey;
- }
-
- /*
- * If no comparison, use default comparison. If no comparison
- * and no prefix, use default prefix. (We can't default the
- * prefix if the user supplies a comparison routine; shortening
- * the keys may break their comparison algorithm. We don't
- * permit the user to specify a prefix routine if they didn't
- * also specify a comparison routine, they can't know enough
- * about our comparison routine to get it right.)
- */
- if ((t->bt_compare = dbinfo->bt_compare) == NULL) {
- if (dbinfo->bt_prefix != NULL)
- goto einval;
- t->bt_compare = __bam_defcmp;
- t->bt_prefix = __bam_defpfx;
- } else
- t->bt_prefix = dbinfo->bt_prefix;
- }
-
- /* Initialize the remaining fields/methods of the DB. */
- dbp->am_close = __bam_close;
- dbp->del = __bam_delete;
- dbp->stat = __bam_stat;
-
- /* Start up the tree. */
- if ((ret = __bam_read_root(dbp)) != 0)
- goto err;
-
- /* Set the overflow page size. */
- __bam_setovflsize(dbp);
-
- return (0);
-
-einval: ret = EINVAL;
-
-err: __os_free(t, sizeof(BTREE));
- return (ret);
-}
-
-/*
- * __bam_close --
- * Close a btree.
- *
- * PUBLIC: int __bam_close __P((DB *));
- */
-int
-__bam_close(dbp)
- DB *dbp;
-{
- __os_free(dbp->internal, sizeof(BTREE));
- dbp->internal = NULL;
-
- return (0);
-}
-
-/*
- * __bam_setovflsize --
- *
- * PUBLIC: void __bam_setovflsize __P((DB *));
- */
-void
-__bam_setovflsize(dbp)
- DB *dbp;
-{
- BTREE *t;
-
- t = dbp->internal;
-
- /*
- * !!!
- * Correction for recno, which doesn't know anything about minimum
- * keys per page.
- */
- if (t->bt_minkey == 0)
- t->bt_minkey = DEFMINKEYPAGE;
-
- /*
- * The btree data structure requires that at least two key/data pairs
- * can fit on a page, but other than that there's no fixed requirement.
- * Translate the minimum number of items into the bytes a key/data pair
- * can use before being placed on an overflow page. We calculate for
- * the worst possible alignment by assuming every item requires the
- * maximum alignment for padding.
- *
- * Recno uses the btree bt_ovflsize value -- it's close enough.
- */
- t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX)
- - (BKEYDATA_PSIZE(0) + ALIGN(1, 4));
-}
-
-/*
- * __bam_read_root --
- * Check (and optionally create) a tree.
- *
- * PUBLIC: int __bam_read_root __P((DB *));
- */
-int
-__bam_read_root(dbp)
- DB *dbp;
-{
- BTMETA *meta;
- BTREE *t;
- DBC *dbc;
- DB_LOCK metalock, rootlock;
- PAGE *root;
- db_pgno_t pgno;
- int ret, t_ret;
-
- ret = 0;
- t = dbp->internal;
-
- /* Get a cursor. */
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
- return (ret);
-
- /* Get, and optionally create the metadata page. */
- pgno = PGNO_METADATA;
- if ((ret =
- __bam_lget(dbc, 0, PGNO_METADATA, DB_LOCK_WRITE, &metalock)) != 0)
- goto err;
- if ((ret =
- memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0) {
- (void)__BT_LPUT(dbc, metalock);
- goto err;
- }
-
- /*
- * If the magic number is correct, we're not creating the tree.
- * Correct any fields that may not be right. Note, all of the
- * local flags were set by db_open(3).
- */
- if (meta->magic != 0) {
- t->bt_maxkey = meta->maxkey;
- t->bt_minkey = meta->minkey;
-
- (void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
- (void)__BT_LPUT(dbc, metalock);
- goto done;
- }
-
- /* Initialize the tree structure metadata information. */
- memset(meta, 0, sizeof(BTMETA));
- ZERO_LSN(meta->lsn);
- meta->pgno = PGNO_METADATA;
- meta->magic = DB_BTREEMAGIC;
- meta->version = DB_BTREEVERSION;
- meta->pagesize = dbp->pgsize;
- meta->maxkey = t->bt_maxkey;
- meta->minkey = t->bt_minkey;
- meta->free = PGNO_INVALID;
- if (dbp->type == DB_RECNO)
- F_SET(meta, BTM_RECNO);
- if (F_ISSET(dbp, DB_AM_DUP))
- F_SET(meta, BTM_DUP);
- if (F_ISSET(dbp, DB_RE_FIXEDLEN))
- F_SET(meta, BTM_FIXEDLEN);
- if (F_ISSET(dbp, DB_BT_RECNUM))
- F_SET(meta, BTM_RECNUM);
- if (F_ISSET(dbp, DB_RE_RENUMBER))
- F_SET(meta, BTM_RENUMBER);
- memcpy(meta->uid, dbp->fileid, DB_FILE_ID_LEN);
-
- /* Create and initialize a root page. */
- pgno = PGNO_ROOT;
- if ((ret =
- __bam_lget(dbc, 0, PGNO_ROOT, DB_LOCK_WRITE, &rootlock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0) {
- (void)__BT_LPUT(dbc, rootlock);
- goto err;
- }
- P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID,
- PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
- ZERO_LSN(root->lsn);
-
- /* Release the metadata and root pages. */
- if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0)
- goto err;
- if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
- goto err;
-
- /*
- * Flush the metadata and root pages to disk -- since the user can't
- * transaction protect open, the pages have to exist during recovery.
- *
- * XXX
- * It's not useful to return not-yet-flushed here -- convert it to
- * an error.
- */
- if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
- ret = EINVAL;
-
- /* Release the locks. */
- (void)__BT_LPUT(dbc, metalock);
- (void)__BT_LPUT(dbc, rootlock);
-
-err:
-done: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
diff --git a/db2/btree/bt_page.c b/db2/btree/bt_page.c
deleted file mode 100644
index 6ccd68a5ab..0000000000
--- a/db2/btree/bt_page.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_page.c 10.17 (Sleepycat) 1/3/99";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_new --
- * Get a new page, preferably from the freelist.
- *
- * PUBLIC: int __bam_new __P((DBC *, u_int32_t, PAGE **));
- */
-int
-__bam_new(dbc, type, pagepp)
- DBC *dbc;
- u_int32_t type;
- PAGE **pagepp;
-{
- BTMETA *meta;
- DB *dbp;
- DB_LOCK metalock;
- PAGE *h;
- db_pgno_t pgno;
- int ret;
-
- dbp = dbc->dbp;
- meta = NULL;
- h = NULL;
- metalock = LOCK_INVALID;
-
- pgno = PGNO_METADATA;
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0)
- goto err;
-
- if (meta->free == PGNO_INVALID) {
- if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_NEW, &h)) != 0)
- goto err;
- ZERO_LSN(h->lsn);
- h->pgno = pgno;
- } else {
- pgno = meta->free;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
- goto err;
- meta->free = h->next_pgno;
- }
-
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- if ((ret = __bam_pg_alloc_log(dbp->dbenv->lg_info, dbc->txn,
- &meta->lsn, 0, dbp->log_fileid, &meta->lsn, &h->lsn,
- h->pgno, (u_int32_t)type, meta->free)) != 0)
- goto err;
- LSN(h) = LSN(meta);
- }
-
- (void)memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
- (void)__BT_TLPUT(dbc, metalock);
-
- P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type);
- *pagepp = h;
- return (0);
-
-err: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
- if (meta != NULL)
- (void)memp_fput(dbp->mpf, meta, 0);
- if (metalock != LOCK_INVALID)
- (void)__BT_TLPUT(dbc, metalock);
- return (ret);
-}
-
-/*
- * __bam_lput --
- * The standard lock put call.
- *
- * PUBLIC: int __bam_lput __P((DBC *, DB_LOCK));
- */
-int
-__bam_lput(dbc, lock)
- DBC *dbc;
- DB_LOCK lock;
-{
- return (__BT_LPUT(dbc, lock));
-}
-
-/*
- * __bam_free --
- * Add a page to the head of the freelist.
- *
- * PUBLIC: int __bam_free __P((DBC *, PAGE *));
- */
-int
-__bam_free(dbc, h)
- DBC *dbc;
- PAGE *h;
-{
- BTMETA *meta;
- DB *dbp;
- DBT ldbt;
- DB_LOCK metalock;
- db_pgno_t pgno;
- u_int32_t dirty_flag;
- int ret, t_ret;
-
- dbp = dbc->dbp;
-
- /*
- * Retrieve the metadata page and insert the page at the head of
- * the free list. If either the lock get or page get routines
- * fail, then we need to put the page with which we were called
- * back because our caller assumes we take care of it.
- */
- dirty_flag = 0;
- pgno = PGNO_METADATA;
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) {
- (void)__BT_TLPUT(dbc, metalock);
- goto err;
- }
-
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- memset(&ldbt, 0, sizeof(ldbt));
- ldbt.data = h;
- ldbt.size = P_OVERHEAD;
- if ((ret = __bam_pg_free_log(dbp->dbenv->lg_info,
- dbc->txn, &meta->lsn, 0, dbp->log_fileid, h->pgno,
- &meta->lsn, &ldbt, meta->free)) != 0) {
- (void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
- (void)__BT_TLPUT(dbc, metalock);
- return (ret);
- }
- LSN(h) = LSN(meta);
- }
-
- /*
- * The page should have nothing interesting on it, re-initialize it,
- * leaving only the page number and the LSN.
- */
-#ifdef DIAGNOSTIC
- { db_pgno_t __pgno; DB_LSN __lsn;
- __pgno = h->pgno;
- __lsn = h->lsn;
- memset(h, 0xdb, dbp->pgsize);
- h->pgno = __pgno;
- h->lsn = __lsn;
- }
-#endif
- P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
-
- /* Link the page on the metadata free list. */
- meta->free = h->pgno;
-
- /* Discard the metadata page. */
- ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
- if ((t_ret = __BT_TLPUT(dbc, metalock)) != 0)
- ret = t_ret;
-
- /* Discard the caller's page reference. */
- dirty_flag = DB_MPOOL_DIRTY;
-err: if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * XXX
- * We have to unlock the caller's page in the caller!
- */
- return (ret);
-}
-
-#ifdef DEBUG
-/*
- * __bam_lt --
- * Print out the list of locks currently held by a cursor.
- *
- * PUBLIC: int __bam_lt __P((DBC *));
- */
-int
-__bam_lt(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DB_LOCKREQ req;
-
- dbp = dbc->dbp;
- if (F_ISSET(dbp, DB_AM_LOCKING)) {
- req.op = DB_LOCK_DUMP;
- lock_vec(dbp->dbenv->lk_info, dbc->locker, 0, &req, 1, NULL);
- }
- return (0);
-}
-#endif
-
-/*
- * __bam_lget --
- * The standard lock get call.
- *
- * PUBLIC: int __bam_lget
- * PUBLIC: __P((DBC *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
- */
-int
-__bam_lget(dbc, do_couple, pgno, mode, lockp)
- DBC *dbc;
- int do_couple;
- db_pgno_t pgno;
- db_lockmode_t mode;
- DB_LOCK *lockp;
-{
- DB *dbp;
- DB_LOCKREQ couple[2];
- int ret;
-
- dbp = dbc->dbp;
-
- if (!F_ISSET(dbp, DB_AM_LOCKING)) {
- *lockp = LOCK_INVALID;
- return (0);
- }
-
- dbc->lock.pgno = pgno;
-
- /*
- * If the object not currently locked, acquire the lock and return,
- * otherwise, lock couple. If we fail and it's not a system error,
- * convert to EAGAIN.
- */
- if (do_couple) {
- couple[0].op = DB_LOCK_GET;
- couple[0].obj = &dbc->lock_dbt;
- couple[0].mode = mode;
- couple[1].op = DB_LOCK_PUT;
- couple[1].lock = *lockp;
-
- if (dbc->txn == NULL)
- ret = lock_vec(dbp->dbenv->lk_info,
- dbc->locker, 0, couple, 2, NULL);
- else
- ret = lock_tvec(dbp->dbenv->lk_info,
- dbc->txn, 0, couple, 2, NULL);
- if (ret != 0) {
- /* If we fail, discard the lock we held. */
- __BT_LPUT(dbc, *lockp);
-
- return (ret < 0 ? EAGAIN : ret);
- }
- *lockp = couple[0].lock;
- } else {
- if (dbc->txn == NULL)
- ret = lock_get(dbp->dbenv->lk_info,
- dbc->locker, 0, &dbc->lock_dbt, mode, lockp);
- else
- ret = lock_tget(dbp->dbenv->lk_info,
- dbc->txn, 0, &dbc->lock_dbt, mode, lockp);
- return (ret < 0 ? EAGAIN : ret);
- }
- return (0);
-}
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
deleted file mode 100644
index 0d7a69889a..0000000000
--- a/db2/btree/bt_put.c
+++ /dev/null
@@ -1,831 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_put.c 10.54 (Sleepycat) 12/6/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-static int __bam_fixed __P((DBC *, DBT *));
-static int __bam_ndup __P((DBC *, PAGE *, u_int32_t));
-static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *));
-static int __bam_partial __P((DBC *,
- DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t));
-static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
-
-/*
- * __bam_iitem --
- * Insert an item into the tree.
- *
- * PUBLIC: int __bam_iitem __P((DBC *,
- * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
- */
-int
-__bam_iitem(dbc, hp, indxp, key, data, op, flags)
- DBC *dbc;
- PAGE **hp;
- db_indx_t *indxp;
- DBT *key, *data;
- u_int32_t op, flags;
-{
- BTREE *t;
- BKEYDATA *bk;
- DB *dbp;
- DBT tdbt;
- PAGE *h;
- db_indx_t indx, nbytes;
- u_int32_t data_size, have_bytes, need_bytes, needed;
- int bigkey, bigdata, dupadjust, replace, ret;
-
- COMPQUIET(bk, NULL);
-
- dbp = dbc->dbp;
- t = dbp->internal;
- h = *hp;
- indx = *indxp;
- dupadjust = replace = 0;
-
- /*
- * If it's a page of duplicates, call the common code to do the work.
- *
- * !!!
- * Here's where the hp and indxp are important. The duplicate code
- * may decide to rework/rearrange the pages and indices we're using,
- * so the caller must understand that the page stack may change.
- */
- if (TYPE(h) == P_DUPLICATE) {
- /* Adjust the index for the new item if it's a DB_AFTER op. */
- if (op == DB_AFTER)
- ++*indxp;
-
- /* Remove the current item if it's a DB_CURRENT op. */
- if (op == DB_CURRENT) {
- bk = GET_BKEYDATA(*hp, *indxp);
- switch (B_TYPE(bk->type)) {
- case B_KEYDATA:
- nbytes = BKEYDATA_SIZE(bk->len);
- break;
- case B_OVERFLOW:
- nbytes = BOVERFLOW_SIZE;
- break;
- default:
- return (__db_pgfmt(dbp, h->pgno));
- }
- if ((ret = __db_ditem(dbc, *hp, *indxp, nbytes)) != 0)
- return (ret);
- }
-
- /* Put the new/replacement item onto the page. */
- if ((ret = __db_dput(dbc, data, hp, indxp, __bam_new)) != 0)
- return (ret);
-
- goto done;
- }
-
- /* Handle fixed-length records: build the real record. */
- if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->recno->re_len) {
- tdbt = *data;
- if ((ret = __bam_fixed(dbc, &tdbt)) != 0)
- return (ret);
- data = &tdbt;
- }
-
- /*
- * Figure out how much space the data will take, including if it's a
- * partial record. If either of the key or data items won't fit on
- * a page, we'll have to store them on overflow pages.
- */
- bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize;
- data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
- __bam_partsize(data, h, indx) : data->size;
- bigdata = data_size > t->bt_ovflsize;
-
- needed = 0;
- if (LF_ISSET(BI_NEWKEY)) {
- /* If BI_NEWKEY is set we're adding a new key and data pair. */
- if (bigkey)
- needed += BOVERFLOW_PSIZE;
- else
- needed += BKEYDATA_PSIZE(key->size);
- if (bigdata)
- needed += BOVERFLOW_PSIZE;
- else
- needed += BKEYDATA_PSIZE(data_size);
- } else {
- /*
- * We're either overwriting the data item of a key/data pair
- * or we're adding the data item only, i.e. a new duplicate.
- */
- if (op == DB_CURRENT) {
- bk = GET_BKEYDATA(h,
- indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
- if (B_TYPE(bk->type) == B_KEYDATA)
- have_bytes = BKEYDATA_PSIZE(bk->len);
- else
- have_bytes = BOVERFLOW_PSIZE;
- need_bytes = 0;
- } else {
- have_bytes = 0;
- need_bytes = sizeof(db_indx_t);
- }
- if (bigdata)
- need_bytes += BOVERFLOW_PSIZE;
- else
- need_bytes += BKEYDATA_PSIZE(data_size);
-
- if (have_bytes < need_bytes)
- needed += need_bytes - have_bytes;
- }
-
- /*
- * If there's not enough room, or the user has put a ceiling on the
- * number of keys permitted in the page, split the page.
- *
- * XXX
- * The t->bt_maxkey test here may be insufficient -- do we have to
- * check in the btree split code, so we don't undo it there!?!?
- */
- if (P_FREESPACE(h) < needed ||
- (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
- return (DB_NEEDSPLIT);
-
- /* Handle partial puts: build the real record. */
- if (F_ISSET(data, DB_DBT_PARTIAL)) {
- tdbt = *data;
- if ((ret = __bam_partial(dbc,
- &tdbt, h, indx, data_size, flags)) != 0)
- return (ret);
- data = &tdbt;
- }
-
- /*
- * The code breaks it up into six cases:
- *
- * 1. Append a new key/data pair.
- * 2. Insert a new key/data pair.
- * 3. Append a new data item (a new duplicate).
- * 4. Insert a new data item (a new duplicate).
- * 5. Overflow item: delete and re-add the data item.
- * 6. Replace the data item.
- */
- if (LF_ISSET(BI_NEWKEY)) {
- switch (op) {
- case DB_AFTER: /* 1. Append a new key/data pair. */
- indx += 2;
- *indxp += 2;
- break;
- case DB_BEFORE: /* 2. Insert a new key/data pair. */
- break;
- default:
- return (EINVAL);
- }
-
- /* Add the key. */
- if (bigkey) {
- if ((ret = __bam_ovput(dbc, h, indx, key)) != 0)
- return (ret);
- } else
- if ((ret = __db_pitem(dbc, h, indx,
- BKEYDATA_SIZE(key->size), NULL, key)) != 0)
- return (ret);
- ++indx;
- } else {
- switch (op) {
- case DB_AFTER: /* 3. Append a new data item. */
- if (TYPE(h) == P_LBTREE) {
- /*
- * Adjust the cursor and copy in the key for
- * the duplicate.
- */
- if ((ret = __bam_adjindx(dbc,
- h, indx + P_INDX, indx, 1)) != 0)
- return (ret);
-
- indx += 3;
- dupadjust = 1;
-
- *indxp += 2;
- } else {
- ++indx;
- __bam_ca_di(dbp, h->pgno, indx, 1);
-
- *indxp += 1;
- }
- break;
- case DB_BEFORE: /* 4. Insert a new data item. */
- if (TYPE(h) == P_LBTREE) {
- /*
- * Adjust the cursor and copy in the key for
- * the duplicate.
- */
- if ((ret =
- __bam_adjindx(dbc, h, indx, indx, 1)) != 0)
- return (ret);
-
- ++indx;
- dupadjust = 1;
- } else
- __bam_ca_di(dbp, h->pgno, indx, 1);
- break;
- case DB_CURRENT:
- if (TYPE(h) == P_LBTREE)
- ++indx;
-
- /*
- * 5. Delete/re-add the data item.
- *
- * If we're dealing with offpage items, we have to
- * delete and then re-add the item.
- */
- if (bigdata || B_TYPE(bk->type) != B_KEYDATA) {
- if ((ret = __bam_ditem(dbc, h, indx)) != 0)
- return (ret);
- break;
- }
-
- /* 6. Replace the data item. */
- replace = 1;
- break;
- default:
- return (EINVAL);
- }
- }
-
- /* Add the data. */
- if (bigdata) {
- if ((ret = __bam_ovput(dbc, h, indx, data)) != 0)
- return (ret);
- } else {
- BKEYDATA __bk;
- DBT __hdr;
-
- if (LF_ISSET(BI_DELETED)) {
- B_TSET(__bk.type, B_KEYDATA, 1);
- __bk.len = data->size;
- __hdr.data = &__bk;
- __hdr.size = SSZA(BKEYDATA, data);
- ret = __db_pitem(dbc, h, indx,
- BKEYDATA_SIZE(data->size), &__hdr, data);
- } else if (replace)
- ret = __bam_ritem(dbc, h, indx, data);
- else
- ret = __db_pitem(dbc, h, indx,
- BKEYDATA_SIZE(data->size), NULL, data);
- if (ret != 0)
- return (ret);
- }
-
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
- return (ret);
-
- /*
- * If the page is at least 50% full, and we added a duplicate, see if
- * that set of duplicates takes up at least 25% of the space. If it
- * does, move it off onto its own page.
- */
- if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
- --indx;
- if ((ret = __bam_ndup(dbc, h, indx)) != 0)
- return (ret);
- }
-
- /*
- * If we've changed the record count, update the tree. Record counts
- * need to be updated in recno databases and in btree databases where
- * we are supporting records. In both cases, adjust the count if the
- * operation wasn't performed on the current record or when the caller
- * overrides and wants the adjustment made regardless.
- */
-done: if (LF_ISSET(BI_DOINCR) ||
- (op != DB_CURRENT &&
- (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO)))
- if ((ret = __bam_adjust(dbc, 1)) != 0)
- return (ret);
-
- /* If we've modified a recno file, set the flag */
- if (t->recno != NULL)
- F_SET(t->recno, RECNO_MODIFIED);
-
- return (ret);
-}
-
-/*
- * __bam_partsize --
- * Figure out how much space a partial data item is in total.
- */
-static u_int32_t
-__bam_partsize(data, h, indx)
- DBT *data;
- PAGE *h;
- u_int32_t indx;
-{
- BKEYDATA *bk;
- u_int32_t nbytes;
-
- /*
- * Figure out how much total space we'll need. If the record doesn't
- * already exist, it's simply the data we're provided.
- */
- if (indx >= NUM_ENT(h))
- return (data->doff + data->size);
-
- /*
- * Otherwise, it's the data provided plus any already existing data
- * that we're not replacing.
- */
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
- nbytes =
- B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
-
- /*
- * There are really two cases here:
- *
- * Case 1: We are replacing some bytes that do not exist (i.e., they
- * are past the end of the record). In this case the number of bytes
- * we are replacing is irrelevant and all we care about is how many
- * bytes we are going to add from offset. So, the new record length
- * is going to be the size of the new bytes (size) plus wherever those
- * new bytes begin (doff).
- *
- * Case 2: All the bytes we are replacing exist. Therefore, the new
- * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
- * plus the bytes we are adding (size).
- */
- if (nbytes < data->doff + data->dlen) /* Case 1 */
- return (data->doff + data->size);
-
- return (nbytes + data->size - data->dlen); /* Case 2 */
-}
-
-/*
- * OVPUT --
- * Copy an overflow item onto a page.
- */
-#undef OVPUT
-#define OVPUT(h, indx, bo) do { \
- DBT __hdr; \
- memset(&__hdr, 0, sizeof(__hdr)); \
- __hdr.data = &bo; \
- __hdr.size = BOVERFLOW_SIZE; \
- if ((ret = __db_pitem(dbc, \
- h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \
- return (ret); \
-} while (0)
-
-/*
- * __bam_ovput --
- * Build an overflow item and put it on the page.
- */
-static int
-__bam_ovput(dbc, h, indx, item)
- DBC *dbc;
- PAGE *h;
- u_int32_t indx;
- DBT *item;
-{
- BOVERFLOW bo;
- int ret;
-
- UMRW(bo.unused1);
- B_TSET(bo.type, B_OVERFLOW, 0);
- UMRW(bo.unused2);
- if ((ret = __db_poff(dbc, item, &bo.pgno, __bam_new)) != 0)
- return (ret);
- bo.tlen = item->size;
-
- OVPUT(h, indx, bo);
-
- return (0);
-}
-
-/*
- * __bam_ritem --
- * Replace an item on a page.
- *
- * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *));
- */
-int
-__bam_ritem(dbc, h, indx, data)
- DBC *dbc;
- PAGE *h;
- u_int32_t indx;
- DBT *data;
-{
- BKEYDATA *bk;
- DB *dbp;
- DBT orig, repl;
- db_indx_t cnt, lo, ln, min, off, prefix, suffix;
- int32_t nbytes;
- int ret;
- u_int8_t *p, *t;
-
- dbp = dbc->dbp;
-
- /*
- * Replace a single item onto a page. The logic figuring out where
- * to insert and whether it fits is handled in the caller. All we do
- * here is manage the page shuffling.
- */
- bk = GET_BKEYDATA(h, indx);
-
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- /*
- * We might as well check to see if the two data items share
- * a common prefix and suffix -- it can save us a lot of log
- * message if they're large.
- */
- min = data->size < bk->len ? data->size : bk->len;
- for (prefix = 0,
- p = bk->data, t = data->data;
- prefix < min && *p == *t; ++prefix, ++p, ++t)
- ;
-
- min -= prefix;
- for (suffix = 0,
- p = (u_int8_t *)bk->data + bk->len - 1,
- t = (u_int8_t *)data->data + data->size - 1;
- suffix < min && *p == *t; ++suffix, --p, --t)
- ;
-
- /* We only log the parts of the keys that have changed. */
- orig.data = (u_int8_t *)bk->data + prefix;
- orig.size = bk->len - (prefix + suffix);
- repl.data = (u_int8_t *)data->data + prefix;
- repl.size = data->size - (prefix + suffix);
- if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbc->txn,
- &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
- (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
- &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
- return (ret);
- }
-
- /*
- * Set references to the first in-use byte on the page and the
- * first byte of the item being replaced.
- */
- p = (u_int8_t *)h + HOFFSET(h);
- t = (u_int8_t *)bk;
-
- /*
- * If the entry is growing in size, shift the beginning of the data
- * part of the page down. If the entry is shrinking in size, shift
- * the beginning of the data part of the page up. Use memmove(3),
- * the regions overlap.
- */
- lo = BKEYDATA_SIZE(bk->len);
- ln = BKEYDATA_SIZE(data->size);
- if (lo != ln) {
- nbytes = lo - ln; /* Signed difference. */
- if (p == t) /* First index is fast. */
- h->inp[indx] += nbytes;
- else { /* Else, shift the page. */
- memmove(p + nbytes, p, t - p);
-
- /* Adjust the indices' offsets. */
- off = h->inp[indx];
- for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
- if (h->inp[cnt] <= off)
- h->inp[cnt] += nbytes;
- }
-
- /* Clean up the page and adjust the item's reference. */
- HOFFSET(h) += nbytes;
- t += nbytes;
- }
-
- /* Copy the new item onto the page. */
- bk = (BKEYDATA *)t;
- B_TSET(bk->type, B_KEYDATA, 0);
- bk->len = data->size;
- memcpy(bk->data, data->data, data->size);
-
- return (0);
-}
-
-/*
- * __bam_ndup --
- * Check to see if the duplicate set at indx should have its own page.
- * If it should, create it.
- */
-static int
-__bam_ndup(dbc, h, indx)
- DBC *dbc;
- PAGE *h;
- u_int32_t indx;
-{
- BKEYDATA *bk;
- BOVERFLOW bo;
- DB *dbp;
- DBT hdr;
- PAGE *cp;
- db_indx_t cnt, cpindx, first, sz;
- int ret;
-
- dbp = dbc->dbp;
-
- while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
- indx -= P_INDX;
- for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
- if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
- break;
- bk = GET_BKEYDATA(h, indx);
- sz += B_TYPE(bk->type) == B_KEYDATA ?
- BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
- bk = GET_BKEYDATA(h, indx + O_INDX);
- sz += B_TYPE(bk->type) == B_KEYDATA ?
- BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
- }
-
- /*
- * If this set of duplicates is using more than 25% of the page, move
- * them off. The choice of 25% is a WAG, but it has to be small enough
- * that we can always split regardless of the presence of duplicates.
- */
- if (sz < dbp->pgsize / 4)
- return (0);
-
- /* Get a new page. */
- if ((ret = __bam_new(dbc, P_DUPLICATE, &cp)) != 0)
- return (ret);
-
- /*
- * Move this set of duplicates off the page. First points to the first
- * key of the first duplicate key/data pair, cnt is the number of pairs
- * we're dealing with.
- */
- memset(&hdr, 0, sizeof(hdr));
- for (indx = first + O_INDX, cpindx = 0;; ++cpindx) {
- /* Copy the entry to the new page. */
- bk = GET_BKEYDATA(h, indx);
- hdr.data = bk;
- hdr.size = B_TYPE(bk->type) == B_KEYDATA ?
- BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
- if ((ret =
- __db_pitem(dbc, cp, cpindx, hdr.size, &hdr, NULL)) != 0)
- goto err;
-
- /*
- * Move cursors referencing the old entry to the new entry.
- * Done after the page put because __db_pitem() adjusts
- * cursors on the new page, and before the delete because
- * __db_ditem adjusts cursors on the old page.
- */
- __bam_ca_dup(dbp,
- PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx);
-
- /* Delete the data item. */
- if ((ret = __db_ditem(dbc, h, indx, hdr.size)) != 0)
- goto err;
-
- /* Delete all but the first reference to the key. */
- if (--cnt == 0)
- break;
- if ((ret = __bam_adjindx(dbc, h, indx, first, 0)) != 0)
- goto err;
- }
-
- /* Put in a new data item that points to the duplicates page. */
- UMRW(bo.unused1);
- B_TSET(bo.type, B_DUPLICATE, 0);
- UMRW(bo.unused2);
- bo.pgno = cp->pgno;
- bo.tlen = 0;
-
- OVPUT(h, indx, bo);
-
- return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY));
-
-err: (void)__bam_free(dbc, cp);
- return (ret);
-}
-
-/*
- * __bam_fixed --
- * Build the real record for a fixed length put.
- */
-static int
-__bam_fixed(dbc, dbt)
- DBC *dbc;
- DBT *dbt;
-{
- DB *dbp;
- RECNO *rp;
- int ret;
-
- dbp = dbc->dbp;
- rp = ((BTREE *)dbp->internal)->recno;
-
- /*
- * If database contains fixed-length records, and the record is long,
- * return EINVAL.
- */
- if (dbt->size > rp->re_len)
- return (EINVAL);
-
- /*
- * The caller checked to see if it was just right, so we know it's
- * short. Pad it out. We use the record data return memory, it's
- * only a short-term use.
- */
- if (dbc->rdata.ulen < rp->re_len) {
- if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
- return (ret);
- }
- dbc->rdata.ulen = rp->re_len;
- }
- memcpy(dbc->rdata.data, dbt->data, dbt->size);
- memset((u_int8_t *)dbc->rdata.data + dbt->size,
- rp->re_pad, rp->re_len - dbt->size);
-
- /*
- * Clean up our flags and other information just in case, and
- * change the caller's DBT to reference our created record.
- */
- dbc->rdata.size = rp->re_len;
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- *dbt = dbc->rdata;
-
- return (0);
-}
-
-/*
- * __bam_partial --
- * Build the real record for a partial put.
- */
-static int
-__bam_partial(dbc, dbt, h, indx, nbytes, flags)
- DBC *dbc;
- DBT *dbt;
- PAGE *h;
- u_int32_t indx, nbytes, flags;
-{
- BKEYDATA *bk, tbk;
- BOVERFLOW *bo;
- DB *dbp;
- DBT copy;
- u_int32_t len, tlen;
- u_int8_t *p;
- int ret;
-
- COMPQUIET(bo, NULL);
-
- dbp = dbc->dbp;
-
- /* We use the record data return memory, it's only a short-term use. */
- if (dbc->rdata.ulen < nbytes) {
- if ((ret = __os_realloc(&dbc->rdata.data, nbytes)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
- return (ret);
- }
- dbc->rdata.ulen = nbytes;
- }
-
- /*
- * We use nul bytes for any part of the record that isn't specified;
- * get it over with.
- */
- memset(dbc->rdata.data, 0, nbytes);
-
- /*
- * In the next clauses, we need to do three things: a) set p to point
- * to the place at which to copy the user's data, b) set tlen to the
- * total length of the record, not including the bytes contributed by
- * the user, and c) copy any valid data from an existing record.
- */
- if (LF_ISSET(BI_NEWKEY)) {
- tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
- goto ucopy;
- }
-
- /* Find the current record. */
- if (indx < NUM_ENT(h)) {
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
- bo = (BOVERFLOW *)bk;
- } else {
- bk = &tbk;
- B_TSET(bk->type, B_KEYDATA, 0);
- bk->len = 0;
- }
- if (B_TYPE(bk->type) == B_OVERFLOW) {
- /*
- * In the case of an overflow record, we shift things around
- * in the current record rather than allocate a separate copy.
- */
- memset(&copy, 0, sizeof(copy));
- if ((ret = __db_goff(dbp, &copy, bo->tlen,
- bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
- return (ret);
-
- /* Skip any leading data from the original record. */
- tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
-
- /*
- * Copy in any trailing data from the original record.
- *
- * If the original record was larger than the original offset
- * plus the bytes being deleted, there is trailing data in the
- * original record we need to preserve. If we aren't deleting
- * the same number of bytes as we're inserting, copy it up or
- * down, into place.
- *
- * Use memmove(), the regions may overlap.
- */
- if (bo->tlen > dbt->doff + dbt->dlen) {
- len = bo->tlen - (dbt->doff + dbt->dlen);
- if (dbt->dlen != dbt->size)
- memmove(p + dbt->size, p + dbt->dlen, len);
- tlen += len;
- }
- } else {
- /* Copy in any leading data from the original record. */
- memcpy(dbc->rdata.data,
- bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
- tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
-
- /* Copy in any trailing data from the original record. */
- len = dbt->doff + dbt->dlen;
- if (bk->len > len) {
- memcpy(p + dbt->size, bk->data + len, bk->len - len);
- tlen += bk->len - len;
- }
- }
-
-ucopy: /*
- * Copy in the application provided data -- p and tlen must have been
- * initialized above.
- */
- memcpy(p, dbt->data, dbt->size);
- tlen += dbt->size;
-
- /* Set the DBT to reference our new record. */
- dbc->rdata.size = tlen;
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- *dbt = dbc->rdata;
- return (0);
-}
diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c
deleted file mode 100644
index de6b3b7d0e..0000000000
--- a/db2/btree/bt_rec.c
+++ /dev/null
@@ -1,903 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_rec.c 10.28 (Sleepycat) 9/27/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "shqueue.h"
-#include "hash.h"
-#include "btree.h"
-#include "log.h"
-#include "common_ext.h"
-
-/*
- * __bam_pg_alloc_recover --
- * Recovery function for pg_alloc.
- *
- * PUBLIC: int __bam_pg_alloc_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_pg_alloc_args *argp;
- BTMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- DB *file_dbp;
- DBC *dbc;
- db_pgno_t pgno;
- int cmp_n, cmp_p, modified, ret;
-
- REC_PRINT(__bam_pg_alloc_print);
- REC_INTRO(__bam_pg_alloc_read);
-
- /*
- * Fix up the allocated page. If we're redoing the operation, we have
- * to get the page (creating it if it doesn't exist), and update its
- * LSN. If we're undoing the operation, we have to reset the page's
- * LSN and put it on the free list.
- *
- * Fix up the metadata page. If we're redoing the operation, we have
- * to get the metadata page and update its LSN and its free pointer.
- * If we're undoing the operation and the page was ever created, we put
- * it on the freelist.
- */
- pgno = PGNO_METADATA;
- if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
- /* The metadata page must always exist. */
- (void)__db_pgerr(file_dbp, pgno);
- goto out;
- }
- if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
- /*
- * We specify creation and check for it later, because this
- * operation was supposed to create the page, and even in
- * the undo case it's going to get linked onto the freelist
- * which we're also fixing up.
- */
- (void)__db_pgerr(file_dbp, argp->pgno);
- (void)memp_fput(mpf, meta, 0);
- goto out;
- }
-
- /* Fix up the allocated page. */
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
-
- pagep->lsn = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
-
- pagep->lsn = argp->page_lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
- (void)memp_fput(mpf, meta, 0);
- goto out;
- }
-
- /* Fix up the metadata page. */
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(meta));
- cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- meta->lsn = *lsnp;
- meta->free = argp->next;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- meta->lsn = argp->meta_lsn;
- meta->free = argp->pgno;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
- * __bam_pg_free_recover --
- * Recovery function for pg_free.
- *
- * PUBLIC: int __bam_pg_free_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_pg_free_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_pg_free_args *argp;
- BTMETA *meta;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, modified, ret;
-
- REC_PRINT(__bam_pg_free_print);
- REC_INTRO(__bam_pg_free_read);
-
- /*
- * Fix up the freed page. If we're redoing the operation we get the
- * page and explicitly discard its contents, then update its LSN. If
- * we're undoing the operation, we get the page and restore its header.
- */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
- /*
- * We don't automatically create the page. The only way the
- * page might not exist is if the alloc never happened, and
- * the only way the alloc might never have happened is if we
- * are undoing, in which case there's no reason to create the
- * page.
- */
- if (!redo)
- goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto out;
- }
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &LSN(argp->header.data));
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- P_INIT(pagep, file_dbp->pgsize,
- pagep->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
- pagep->lsn = *lsnp;
-
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- memcpy(pagep, argp->header.data, argp->header.size);
-
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
- /*
- * Fix up the metadata page. If we're redoing or undoing the operation
- * we get the page and update its LSN and free pointer.
- */
- pgno = PGNO_METADATA;
- if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
- /* The metadata page must always exist. */
- (void)__db_pgerr(file_dbp, pgno);
- goto out;
- }
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(meta));
- cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- meta->free = argp->pgno;
-
- meta->lsn = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- meta->free = argp->next;
-
- meta->lsn = argp->meta_lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
- * __bam_split_recover --
- * Recovery function for split.
- *
- * PUBLIC: int __bam_split_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_split_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_split_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
- db_pgno_t pgno;
- int l_update, p_update, r_update, ret, rootsplit, t_ret;
-
- REC_PRINT(__bam_split_print);
-
- mpf = NULL;
- _lp = lp = np = pp = _rp = rp = NULL;
-
- REC_INTRO(__bam_split_read);
-
- /*
- * There are two kinds of splits that we have to recover from. The
- * first is a root-page split, where the root page is split from a
- * leaf page into an internal page and two new leaf pages are created.
- * The second is where a page is split into two pages, and a new key
- * is inserted into the parent page.
- */
- sp = argp->pg.data;
- pgno = PGNO(sp);
- rootsplit = pgno == PGNO_ROOT;
- if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
- lp = NULL;
- if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
- rp = NULL;
-
- if (redo) {
- l_update = r_update = p_update = 0;
- /*
- * Decide if we need to resplit the page.
- *
- * If this is a root split, then the root has to exist, it's
- * the page we're splitting and it gets modified. If this is
- * not a root split, then the left page has to exist, for the
- * same reason.
- */
- if (rootsplit) {
- if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
- (void)__db_pgerr(file_dbp, pgno);
- pp = NULL;
- goto out;
- }
- p_update =
- log_compare(&LSN(pp), &LSN(argp->pg.data)) == 0;
- } else
- if (lp == NULL) {
- (void)__db_pgerr(file_dbp, argp->left);
- goto out;
- }
- if (lp == NULL || log_compare(&LSN(lp), &argp->llsn) == 0)
- l_update = 1;
- if (rp == NULL || log_compare(&LSN(rp), &argp->rlsn) == 0)
- r_update = 1;
- if (!p_update && !l_update && !r_update)
- goto done;
-
- /* Allocate and initialize new left/right child pages. */
- if ((ret = __os_malloc(file_dbp->pgsize, NULL, &_lp)) != 0 ||
- (ret = __os_malloc(file_dbp->pgsize, NULL, &_rp)) != 0)
- goto out;
- if (rootsplit) {
- P_INIT(_lp, file_dbp->pgsize, argp->left,
- PGNO_INVALID,
- ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
- LEVEL(sp), TYPE(sp));
- P_INIT(_rp, file_dbp->pgsize, argp->right,
- ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
- PGNO_INVALID, LEVEL(sp), TYPE(sp));
- } else {
- P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
- ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
- ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
- LEVEL(sp), TYPE(sp));
- P_INIT(_rp, file_dbp->pgsize, argp->right,
- ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
- ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
- LEVEL(sp), TYPE(sp));
- }
-
- /* Split the page. */
- if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
- (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
- NUM_ENT(sp))) != 0)
- goto out;
-
- /* If the left child is wrong, update it. */
- if (lp == NULL && (ret =
- memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
- (void)__db_pgerr(file_dbp, argp->left);
- lp = NULL;
- goto out;
- }
- if (l_update) {
- memcpy(lp, _lp, file_dbp->pgsize);
- lp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
- goto out;
- lp = NULL;
- }
-
- /* If the right child is wrong, update it. */
- if (rp == NULL && (ret = memp_fget(mpf,
- &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
- (void)__db_pgerr(file_dbp, argp->right);
- rp = NULL;
- goto out;
- }
- if (r_update) {
- memcpy(rp, _rp, file_dbp->pgsize);
- rp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
- goto out;
- rp = NULL;
- }
-
- /*
- * If the parent page is wrong, update it. This is of interest
- * only if it was a root split, since root splits create parent
- * pages. All other splits modify a parent page, but those are
- * separately logged and recovered.
- */
- if (rootsplit && p_update) {
- if (file_dbp->type == DB_BTREE)
- P_INIT(pp, file_dbp->pgsize,
- PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
- _lp->level + 1, P_IBTREE);
- else
- P_INIT(pp, file_dbp->pgsize,
- PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
- _lp->level + 1, P_IRECNO);
- RE_NREC_SET(pp,
- file_dbp->type == DB_RECNO ||
- F_ISSET(file_dbp, DB_BT_RECNUM) ?
- __bam_total(_lp) + __bam_total(_rp) : 0);
- pp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
- goto out;
- pp = NULL;
- }
-
- /*
- * Finally, redo the next-page link if necessary. This is of
- * interest only if it wasn't a root split -- inserting a new
- * page in the tree requires that any following page have its
- * previous-page pointer updated to our new page. The next
- * page must exist because we're redoing the operation.
- */
- if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
- if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
- (void)__db_pgerr(file_dbp, argp->npgno);
- np = NULL;
- goto out;
- }
- if (log_compare(&LSN(np), &argp->nlsn) == 0) {
- PREV_PGNO(np) = argp->right;
- np->lsn = *lsnp;
- if ((ret =
- memp_fput(mpf, np, DB_MPOOL_DIRTY)) != 0)
- goto out;
- np = NULL;
- }
- }
- } else {
- /*
- * If the split page is wrong, replace its contents with the
- * logged page contents. If the page doesn't exist, it means
- * that the create of the page never happened, nor did any of
- * the adds onto the page that caused the split, and there's
- * really no undo-ing to be done.
- */
- if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
- pp = NULL;
- goto lrundo;
- }
- if (log_compare(lsnp, &LSN(pp)) == 0) {
- memcpy(pp, argp->pg.data, argp->pg.size);
- if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
- goto out;
- pp = NULL;
- }
-
- /*
- * If it's a root split and the left child ever existed, update
- * its LSN. (If it's not a root split, we've updated the left
- * page already -- it's the same as the split page.) If the
- * right child ever existed, root split or not, update its LSN.
- * The undo of the page allocation(s) will restore them to the
- * free list.
- */
-lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
- if (rootsplit && lp != NULL &&
- log_compare(lsnp, &LSN(lp)) == 0) {
- lp->lsn = argp->llsn;
- if ((ret =
- memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
- goto out;
- lp = NULL;
- }
- if (rp != NULL &&
- log_compare(lsnp, &LSN(rp)) == 0) {
- rp->lsn = argp->rlsn;
- if ((ret =
- memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
- goto out;
- rp = NULL;
- }
- }
-
- /*
- * Finally, undo the next-page link if necessary. This is of
- * interest only if it wasn't a root split -- inserting a new
- * page in the tree requires that any following page have its
- * previous-page pointer updated to our new page. Since it's
- * possible that the next-page never existed, we ignore it as
- * if there's nothing to undo.
- */
- if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
- if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
- np = NULL;
- goto done;
- }
- if (log_compare(lsnp, &LSN(np)) == 0) {
- PREV_PGNO(np) = argp->left;
- np->lsn = argp->nlsn;
- if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
- goto out;
- np = NULL;
- }
- }
- }
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: /* Free any pages that weren't dirtied. */
- if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Free any allocated space. */
- if (_lp != NULL)
- __os_free(_lp, file_dbp->pgsize);
- if (_rp != NULL)
- __os_free(_rp, file_dbp->pgsize);
-
- REC_CLOSE;
-}
-
-/*
- * __bam_rsplit_recover --
- * Recovery function for a reverse split.
- *
- * PUBLIC: int __bam_rsplit_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_rsplit_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, modified, ret;
-
- REC_PRINT(__bam_rsplit_print);
- REC_INTRO(__bam_rsplit_read);
-
- /* Fix the root page. */
- pgno = PGNO_ROOT;
- if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
- /* The root page must always exist. */
- __db_pgerr(file_dbp, pgno);
- goto out;
- }
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->rootlsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
- pagep->pgno = PGNO_ROOT;
- pagep->lsn = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
- argp->nrec, PGNO_INVALID, pagep->level + 1,
- file_dbp->type == DB_BTREE ? P_IBTREE : P_IRECNO);
- if ((ret = __db_pitem(dbc, pagep, 0,
- argp->rootent.size, &argp->rootent, NULL)) != 0)
- goto out;
- pagep->lsn = argp->rootlsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
- /*
- * Fix the page copied over the root page. It's possible that the
- * page never made it to disk, so if we're undo-ing and the page
- * doesn't exist, it's okay and there's nothing further to do.
- */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
- if (!redo)
- goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto out;
- }
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &LSN(argp->pgdbt.data));
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- pagep->lsn = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
- * __bam_adj_recover --
- * Recovery function for adj.
- *
- * PUBLIC: int __bam_adj_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_adj_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_adj_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
-
- REC_PRINT(__bam_adj_print);
- REC_INTRO(__bam_adj_read);
-
- /* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
- if (!redo)
- goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto out;
- }
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->lsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- if ((ret = __bam_adjindx(dbc,
- pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
- goto err;
-
- LSN(pagep) = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- if ((ret = __bam_adjindx(dbc,
- pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
- goto err;
-
- LSN(pagep) = argp->lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
- if (0) {
-err: (void)memp_fput(mpf, pagep, 0);
- }
-out: REC_CLOSE;
-}
-
-/*
- * __bam_cadjust_recover --
- * Recovery function for the adjust of a count change in an internal
- * page.
- *
- * PUBLIC: int __bam_cadjust_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_cadjust_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_cadjust_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
-
- REC_PRINT(__bam_cadjust_print);
- REC_INTRO(__bam_cadjust_read);
-
- /* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
- if (!redo)
- goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto out;
- }
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->lsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- if (file_dbp->type == DB_BTREE &&
- F_ISSET(file_dbp, DB_BT_RECNUM)) {
- GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
- if (argp->total && PGNO(pagep) == PGNO_ROOT)
- RE_NREC_ADJ(pagep, argp->adjust);
- }
- if (file_dbp->type == DB_RECNO) {
- GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
- if (argp->total && PGNO(pagep) == PGNO_ROOT)
- RE_NREC_ADJ(pagep, argp->adjust);
- }
-
- LSN(pagep) = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- if (file_dbp->type == DB_BTREE &&
- F_ISSET(file_dbp, DB_BT_RECNUM)) {
- GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
- if (argp->total && PGNO(pagep) == PGNO_ROOT)
- RE_NREC_ADJ(pagep, argp->adjust);
- }
- if (file_dbp->type == DB_RECNO) {
- GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
- if (argp->total && PGNO(pagep) == PGNO_ROOT)
- RE_NREC_ADJ(pagep, -(argp->adjust));
- }
- LSN(pagep) = argp->lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
- * __bam_cdel_recover --
- * Recovery function for the intent-to-delete of a cursor record.
- *
- * PUBLIC: int __bam_cdel_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_cdel_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_cdel_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
-
- REC_PRINT(__bam_cdel_print);
- REC_INTRO(__bam_cdel_read);
-
- /* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
- if (!redo)
- goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto out;
- }
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->lsn);
- if (cmp_p == 0 && redo) {
- /* Need to redo update described. */
- if (pagep->type == P_DUPLICATE)
- B_DSET(GET_BKEYDATA(pagep, argp->indx)->type);
- else
- B_DSET(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type);
-
- LSN(pagep) = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /* Need to undo update described. */
- if (pagep->type == P_DUPLICATE)
- B_DCLR(GET_BKEYDATA(pagep, argp->indx)->type);
- else
- B_DCLR(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type);
-
- LSN(pagep) = argp->lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
- * __bam_repl_recover --
- * Recovery function for page item replacement.
- *
- * PUBLIC: int __bam_repl_recover
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_repl_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __bam_repl_args *argp;
- BKEYDATA *bk;
- DB *file_dbp;
- DBC *dbc;
- DBT dbt;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
- u_int8_t *p;
-
- REC_PRINT(__bam_repl_print);
- REC_INTRO(__bam_repl_read);
-
- /* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
- if (!redo)
- goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto out;
- }
- bk = GET_BKEYDATA(pagep, argp->indx);
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->lsn);
- if (cmp_p == 0 && redo) {
- /*
- * Need to redo update described.
- *
- * Re-build the replacement item.
- */
- memset(&dbt, 0, sizeof(dbt));
- dbt.size = argp->prefix + argp->suffix + argp->repl.size;
- if ((ret = __os_malloc(dbt.size, NULL, &dbt.data)) != 0)
- goto err;
- p = dbt.data;
- memcpy(p, bk->data, argp->prefix);
- p += argp->prefix;
- memcpy(p, argp->repl.data, argp->repl.size);
- p += argp->repl.size;
- memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
-
- ret = __bam_ritem(dbc, pagep, argp->indx, &dbt);
- __os_free(dbt.data, dbt.size);
- if (ret != 0)
- goto err;
-
- LSN(pagep) = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && !redo) {
- /*
- * Need to undo update described.
- *
- * Re-build the original item.
- */
- memset(&dbt, 0, sizeof(dbt));
- dbt.size = argp->prefix + argp->suffix + argp->orig.size;
- if ((ret = __os_malloc(dbt.size, NULL, &dbt.data)) != 0)
- goto err;
- p = dbt.data;
- memcpy(p, bk->data, argp->prefix);
- p += argp->prefix;
- memcpy(p, argp->orig.data, argp->orig.size);
- p += argp->orig.size;
- memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
-
- ret = __bam_ritem(dbc, pagep, argp->indx, &dbt);
- __os_free(dbt.data, dbt.size);
- if (ret != 0)
- goto err;
-
- /* Reset the deleted flag, if necessary. */
- if (argp->isdeleted)
- B_DSET(GET_BKEYDATA(pagep, argp->indx)->type);
-
- LSN(pagep) = argp->lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
- if (0) {
-err: (void)memp_fput(mpf, pagep, 0);
- }
-out: REC_CLOSE;
-}
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
deleted file mode 100644
index c69877ff7f..0000000000
--- a/db2/btree/bt_recno.c
+++ /dev/null
@@ -1,1356 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_recno.c 10.53 (Sleepycat) 12/11/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <limits.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "db_ext.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "lock_ext.h"
-
-static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
-static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
-static int __ram_fmap __P((DBC *, db_recno_t));
-static int __ram_i_delete __P((DBC *));
-static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-static int __ram_source __P((DB *, RECNO *, const char *));
-static int __ram_sync __P((DB *, u_int32_t));
-static int __ram_update __P((DBC *, db_recno_t, int));
-static int __ram_vmap __P((DBC *, db_recno_t));
-static int __ram_writeback __P((DBC *));
-
-/*
- * In recno, there are two meanings to the on-page "deleted" flag. If we're
- * re-numbering records, it means the record was implicitly created. We skip
- * over implicitly created records if doing a cursor "next" or "prev", and
- * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering
- * records, it means that the record was implicitly created, or was deleted.
- * We skip over implicitly created or deleted records if doing a cursor "next"
- * or "prev", and return DB_KEYEMPTY if they're explicitly requested.
- *
- * If we're re-numbering records, then we have to detect in the cursor that
- * a record was deleted, and adjust the cursor as necessary on the next get.
- * If we're not re-numbering records, then we can detect that a record has
- * been deleted by looking at the actual on-page record, so we completely
- * ignore the cursor's delete flag. This is different from the B+tree code.
- * It also maintains whether the cursor references a deleted record in the
- * cursor, and it doesn't always check the on-page value.
- */
-#define CD_SET(dbp, cp) { \
- if (F_ISSET(dbp, DB_RE_RENUMBER)) \
- F_SET(cp, C_DELETED); \
-}
-#define CD_CLR(dbp, cp) { \
- if (F_ISSET(dbp, DB_RE_RENUMBER)) \
- F_CLR(cp, C_DELETED); \
-}
-#define CD_ISSET(dbp, cp) \
- (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, C_DELETED))
-
-/*
- * __ram_open --
- * Recno open function.
- *
- * PUBLIC: int __ram_open __P((DB *, DB_INFO *));
- */
-int
-__ram_open(dbp, dbinfo)
- DB *dbp;
- DB_INFO *dbinfo;
-{
- BTREE *t;
- DBC *dbc;
- RECNO *rp;
- int ret, t_ret;
-
- /* Allocate and initialize the private btree structure. */
- if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0)
- return (ret);
- dbp->internal = t;
- __bam_setovflsize(dbp);
-
- /* Allocate and initialize the private recno structure. */
- if ((ret = __os_calloc(1, sizeof(*rp), &rp)) != 0)
- return (ret);
- /* Link in the private recno structure. */
- t->recno = rp;
-
- /*
- * Intention is to make sure all of the user's selections are okay
- * here and then use them without checking.
- */
- if (dbinfo == NULL) {
- rp->re_delim = '\n';
- rp->re_pad = ' ';
- rp->re_fd = -1;
- F_SET(rp, RECNO_EOF);
- } else {
- /*
- * If the user specified a source tree, open it and map it in.
- *
- * !!!
- * We don't complain if the user specified transactions or
- * threads. It's possible to make it work, but you'd better
- * know what you're doing!
- */
- if (dbinfo->re_source == NULL) {
- rp->re_fd = -1;
- F_SET(rp, RECNO_EOF);
- } else {
- if ((ret =
- __ram_source(dbp, rp, dbinfo->re_source)) != 0)
- goto err;
- }
-
- /* Copy delimiter, length and padding values. */
- rp->re_delim =
- F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n';
- rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' ';
-
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if ((rp->re_len = dbinfo->re_len) == 0) {
- __db_err(dbp->dbenv,
- "record length must be greater than 0");
- ret = EINVAL;
- goto err;
- }
- } else
- rp->re_len = 0;
- }
-
- /* Initialize the remaining fields/methods of the DB. */
- dbp->am_close = __ram_close;
- dbp->del = __ram_delete;
- dbp->put = __ram_put;
- dbp->stat = __bam_stat;
- dbp->sync = __ram_sync;
-
- /* Start up the tree. */
- if ((ret = __bam_read_root(dbp)) != 0)
- goto err;
-
- /* Set the overflow page size. */
- __bam_setovflsize(dbp);
-
- /* If we're snapshotting an underlying source file, do it now. */
- if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT)) {
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
- goto err;
-
- /* Do the snapshot. */
- if ((ret = __ram_update(dbc,
- DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND)
- ret = 0;
-
- /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- if (ret != 0)
- goto err;
- }
-
- return (0);
-
-err: /* If we mmap'd a source file, discard it. */
- if (rp->re_smap != NULL)
- (void)__db_unmapfile(rp->re_smap, rp->re_msize);
-
- /* If we opened a source file, discard it. */
- if (rp->re_fd != -1)
- (void)__os_close(rp->re_fd);
- if (rp->re_source != NULL)
- __os_freestr(rp->re_source);
-
- __os_free(rp, sizeof(*rp));
-
- return (ret);
-}
-
-/*
- * __ram_delete --
- * Recno db->del function.
- */
-static int
-__ram_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- CURSOR *cp;
- DBC *dbc;
- db_recno_t recno;
- int ret, t_ret;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_delchk(dbp,
- key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- /* Acquire a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags);
-
- /* Check the user's record number and fill in as necessary. */
- if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
- goto err;
-
- /* Do the delete. */
- cp = dbc->internal;
- cp->recno = recno;
- ret = __ram_i_delete(dbc);
-
- /* Release the cursor. */
-err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __ram_i_delete --
- * Internal version of recno delete, called by __ram_delete and
- * __ram_c_del.
- */
-static int
-__ram_i_delete(dbc)
- DBC *dbc;
-{
- BKEYDATA bk;
- BTREE *t;
- CURSOR *cp;
- DB *dbp;
- DBT hdr, data;
- PAGE *h;
- db_indx_t indx;
- int exact, ret, stack;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- t = dbp->internal;
- stack = 0;
-
- /*
- * If this is CDB and this isn't a write cursor, then it's an error.
- * If it is a write cursor, but we don't yet hold the write lock, then
- * we need to upgrade to the write lock.
- */
- if (F_ISSET(dbp, DB_AM_CDB)) {
- /* Make sure it's a valid update cursor. */
- if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
- return (EINVAL);
-
- if (F_ISSET(dbc, DBC_RMW) &&
- (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
- DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
- &dbc->mylock)) != 0)
- return (EAGAIN);
- }
-
- /* Search the tree for the key; delete only deletes exact matches. */
- if ((ret = __bam_rsearch(dbc, &cp->recno, S_DELETE, 1, &exact)) != 0)
- goto err;
- if (!exact) {
- ret = DB_NOTFOUND;
- goto err;
- }
- stack = 1;
-
- h = cp->csp->page;
- indx = cp->csp->indx;
-
- /*
- * If re-numbering records, the on-page deleted flag can only mean
- * that this record was implicitly created. Applications aren't
- * permitted to delete records they never created, return an error.
- *
- * If not re-numbering records, the on-page deleted flag means that
- * this record was implicitly created, or, was deleted at some time.
- * The former is an error because applications aren't permitted to
- * delete records they never created, the latter is an error because
- * if the record was "deleted", we could never have found it.
- */
- if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
- ret = DB_KEYEMPTY;
- goto err;
- }
-
- if (F_ISSET(dbp, DB_RE_RENUMBER)) {
- /* Delete the item, adjust the counts, adjust the cursors. */
- if ((ret = __bam_ditem(dbc, h, indx)) != 0)
- goto err;
- __bam_adjust(dbc, -1);
- __ram_ca(dbp, cp->recno, CA_DELETE);
-
- /*
- * If the page is empty, delete it. The whole tree is locked
- * so there are no preparations to make.
- */
- if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
- stack = 0;
- ret = __bam_dpages(dbc);
- }
- } else {
- /* Use a delete/put pair to replace the record with a marker. */
- if ((ret = __bam_ditem(dbc, h, indx)) != 0)
- goto err;
-
- B_TSET(bk.type, B_KEYDATA, 1);
- bk.len = 0;
- memset(&hdr, 0, sizeof(hdr));
- hdr.data = &bk;
- hdr.size = SSZA(BKEYDATA, data);
- memset(&data, 0, sizeof(data));
- data.data = (char *)"";
- data.size = 0;
- if ((ret = __db_pitem(dbc,
- h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
- goto err;
- }
- F_SET(t->recno, RECNO_MODIFIED);
-
-err: if (stack)
- __bam_stkrel(dbc, 0);
-
- /* If we upgraded the CDB lock upon entry; downgrade it now. */
- if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
- (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
- DB_LOCK_IWRITE, 0);
- return (ret);
-}
-
-/*
- * __ram_put --
- * Recno db->put function.
- */
-static int
-__ram_put(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
-{
- DBC *dbc;
- db_recno_t recno;
- int ret, t_ret;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_putchk(dbp,
- key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags);
-
- /*
- * If we're appending to the tree, make sure we've read in all of
- * the backing source file. Otherwise, check the user's record
- * number and fill in as necessary.
- */
- ret = flags == DB_APPEND ?
- __ram_update(dbc, DB_MAX_RECORDS, 0) :
- __ram_getno(dbc, key, &recno, 1);
-
- /* Add the record. */
- if (ret == 0)
- ret = __ram_add(dbc, &recno, data, flags, 0);
-
- /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Return the record number if we're appending to the tree. */
- if (ret == 0 && flags == DB_APPEND)
- *(db_recno_t *)key->data = recno;
-
- return (ret);
-}
-
-/*
- * __ram_sync --
- * Recno db->sync function.
- */
-static int
-__ram_sync(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- DBC *dbc;
- int ret, t_ret;
-
- /*
- * Sync the underlying btree.
- *
- * !!!
- * We don't need to do a panic check or flags check, the "real"
- * sync function does all that for us.
- */
- if ((ret = __db_sync(dbp, flags)) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, NULL, "ram_sync", NULL, NULL, flags);
-
- /* Copy back the backing source file. */
- ret = __ram_writeback(dbc);
-
- /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __ram_close --
- * Recno db->close function.
- *
- * PUBLIC: int __ram_close __P((DB *));
- */
-int
-__ram_close(dbp)
- DB *dbp;
-{
- RECNO *rp;
-
- rp = ((BTREE *)dbp->internal)->recno;
-
- /* Close any underlying mmap region. */
- if (rp->re_smap != NULL)
- (void)__db_unmapfile(rp->re_smap, rp->re_msize);
-
- /* Close any backing source file descriptor. */
- if (rp->re_fd != -1)
- (void)__os_close(rp->re_fd);
-
- /* Free any backing source file name. */
- if (rp->re_source != NULL)
- __os_freestr(rp->re_source);
-
- /* Free allocated memory. */
- __os_free(rp, sizeof(RECNO));
- ((BTREE *)dbp->internal)->recno = NULL;
-
- /* Close the underlying btree. */
- return (__bam_close(dbp));
-}
-
-/*
- * __ram_c_del --
- * Recno cursor->c_del function.
- *
- * PUBLIC: int __ram_c_del __P((DBC *, u_int32_t));
- */
-int
-__ram_c_del(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- CURSOR *cp;
- DB *dbp;
- int ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_cdelchk(dbp, flags,
- F_ISSET(dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, dbc->txn, "ram_c_del", NULL, NULL, flags);
-
- /*
- * If we are running CDB, this had better be either a write
- * cursor or an immediate writer.
- */
- if (F_ISSET(dbp, DB_AM_CDB))
- if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
- return (EINVAL);
-
- /*
- * The semantics of cursors during delete are as follows: if record
- * numbers are mutable (DB_RE_RENUMBER is set), deleting a record
- * causes the cursor to automatically point to the record immediately
- * following. In this case it is possible to use a single cursor for
- * repeated delete operations, without intervening operations.
- *
- * If record numbers are not mutable, then records are replaced with
- * a marker containing a delete flag. If the record referenced by
- * this cursor has already been deleted, we will detect that as part
- * of the delete operation, and fail.
- */
- return (__ram_i_delete(dbc));
-}
-
-/*
- * __ram_c_get --
- * Recno cursor->c_get function.
- *
- * PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__ram_c_get(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- CURSOR *cp, copy;
- DB *dbp;
- PAGE *h;
- db_indx_t indx;
- int exact, ret, stack, tmp_rmw;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_cgetchk(dbc->dbp,
- key, data, flags, cp->recno != RECNO_OOB)) != 0)
- return (ret);
-
- /* Clear OR'd in additional bits so we can check for flag equality. */
- tmp_rmw = 0;
- if (LF_ISSET(DB_RMW)) {
- if (!F_ISSET(dbp, DB_AM_CDB)) {
- tmp_rmw = 1;
- F_SET(dbc, DBC_RMW);
- }
- LF_CLR(DB_RMW);
- }
-
- DEBUG_LREAD(dbc, dbc->txn, "ram_c_get",
- flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
-
- /* Initialize the cursor for a new retrieval. */
- copy = *cp;
-
-retry: /* Update the record number. */
- stack = 0;
- switch (flags) {
- case DB_CURRENT:
- /*
- * If record numbers are mutable: if we just deleted a record,
- * there is no action necessary, we return the record following
- * the deleted item by virtue of renumbering the tree.
- */
- break;
- case DB_NEXT:
- /*
- * If record numbers are mutable: if we just deleted a record,
- * we have to avoid incrementing the record number so that we
- * return the right record by virtue of renumbering the tree.
- */
- if (CD_ISSET(dbp, cp))
- break;
-
- if (cp->recno != RECNO_OOB) {
- ++cp->recno;
- break;
- }
- /* FALLTHROUGH */
- case DB_FIRST:
- flags = DB_NEXT;
- cp->recno = 1;
- break;
- case DB_PREV:
- if (cp->recno != RECNO_OOB) {
- if (cp->recno == 1) {
- ret = DB_NOTFOUND;
- goto err;
- }
- --cp->recno;
- break;
- }
- /* FALLTHROUGH */
- case DB_LAST:
- flags = DB_PREV;
- if (((ret = __ram_update(dbc,
- DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND)
- goto err;
- if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0)
- goto err;
- if (cp->recno == 0) {
- ret = DB_NOTFOUND;
- goto err;
- }
- break;
- case DB_SET:
- case DB_SET_RANGE:
- if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0)
- goto err;
- break;
- }
-
- /* Return the key if the user didn't give us one. */
- if (flags != DB_SET && flags != DB_SET_RANGE &&
- (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
- &dbc->rkey.data, &dbc->rkey.ulen, dbp->db_malloc)) != 0)
- goto err;
-
- /* Search the tree for the record. */
- if ((ret = __bam_rsearch(dbc, &cp->recno,
- F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 1, &exact)) != 0)
- goto err;
- stack = 1;
- if (!exact) {
- ret = DB_NOTFOUND;
- goto err;
- }
- h = cp->csp->page;
- indx = cp->csp->indx;
-
- /*
- * If re-numbering records, the on-page deleted flag means this record
- * was implicitly created. If not re-numbering records, the on-page
- * deleted flag means this record was implicitly created, or, it was
- * deleted at some time. Regardless, we skip such records if doing
- * cursor next/prev operations, and fail if the application requested
- * them explicitly.
- */
- if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
- if (flags == DB_NEXT || flags == DB_PREV) {
- (void)__bam_stkrel(dbc, 0);
- goto retry;
- }
- ret = DB_KEYEMPTY;
- goto err;
- }
-
- /* Return the data item. */
- if ((ret = __db_ret(dbp,
- h, indx, data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
- goto err;
-
- /* The cursor was reset, no further delete adjustment is necessary. */
- CD_CLR(dbp, cp);
-
-err: if (stack)
- (void)__bam_stkrel(dbc, 0);
-
- /* Release temporary lock upgrade. */
- if (tmp_rmw)
- F_CLR(dbc, DBC_RMW);
-
- if (ret != 0)
- *cp = copy;
-
- return (ret);
-}
-
-/*
- * __ram_c_put --
- * Recno cursor->c_put function.
- *
- * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__ram_c_put(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- CURSOR *cp, copy;
- DB *dbp;
- int exact, ret;
- void *arg;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- DB_PANIC_CHECK(dbp);
-
- if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
- F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, dbc->txn, "ram_c_put", NULL, data, flags);
-
- /*
- * If we are running CDB, this had better be either a write
- * cursor or an immediate writer. If it's a regular writer,
- * that means we have an IWRITE lock and we need to upgrade
- * it to a write lock.
- */
- if (F_ISSET(dbp, DB_AM_CDB)) {
- if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
- return (EINVAL);
-
- if (F_ISSET(dbc, DBC_RMW) &&
- (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
- DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
- &dbc->mylock)) != 0)
- return (EAGAIN);
- }
-
- /* Initialize the cursor for a new retrieval. */
- copy = *cp;
-
- /*
- * To split, we need a valid key for the page. Since it's a cursor,
- * we have to build one.
- *
- * The split code discards all short-term locks and stack pages.
- */
- if (0) {
-split: arg = &cp->recno;
- if ((ret = __bam_split(dbc, arg)) != 0)
- goto err;
- }
-
- if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
- goto err;
- if (!exact) {
- ret = DB_NOTFOUND;
- goto err;
- }
- if ((ret = __bam_iitem(dbc, &cp->csp->page,
- &cp->csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) {
- if ((ret = __bam_stkrel(dbc, 0)) != 0)
- goto err;
- goto split;
- }
- if ((ret = __bam_stkrel(dbc, 0)) != 0)
- goto err;
-
- switch (flags) {
- case DB_AFTER:
- /* Adjust the cursors. */
- __ram_ca(dbp, cp->recno, CA_IAFTER);
-
- /* Set this cursor to reference the new record. */
- cp->recno = copy.recno + 1;
- break;
- case DB_BEFORE:
- /* Adjust the cursors. */
- __ram_ca(dbp, cp->recno, CA_IBEFORE);
-
- /* Set this cursor to reference the new record. */
- cp->recno = copy.recno;
- break;
- }
-
- /* The cursor was reset, no further delete adjustment is necessary. */
- CD_CLR(dbp, cp);
-
-err: if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
- (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
- DB_LOCK_IWRITE, 0);
-
- if (ret != 0)
- *cp = copy;
-
- return (ret);
-}
-
-/*
- * __ram_ca --
- * Adjust cursors.
- *
- * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
- */
-void
-__ram_ca(dbp, recno, op)
- DB *dbp;
- db_recno_t recno;
- ca_recno_arg op;
-{
- CURSOR *cp;
- DBC *dbc;
-
- /*
- * Adjust the cursors. See the comment in __bam_ca_delete().
- */
- DB_THREAD_LOCK(dbp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = dbc->internal;
- switch (op) {
- case CA_DELETE:
- if (recno > cp->recno)
- --cp->recno;
- if (recno == cp->recno)
- CD_SET(dbp, cp);
- break;
- case CA_IAFTER:
- if (recno > cp->recno)
- ++cp->recno;
- break;
- case CA_IBEFORE:
- if (recno >= cp->recno)
- ++cp->recno;
- break;
- }
- }
- DB_THREAD_UNLOCK(dbp);
-}
-
-/*
- * __ram_getno --
- * Check the user's record number, and make sure we've seen it.
- *
- * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
- */
-int
-__ram_getno(dbc, key, rep, can_create)
- DBC *dbc;
- const DBT *key;
- db_recno_t *rep;
- int can_create;
-{
- DB *dbp;
- db_recno_t recno;
-
- dbp = dbc->dbp;
-
- /* Check the user's record number. */
- if ((recno = *(db_recno_t *)key->data) == 0) {
- __db_err(dbp->dbenv, "illegal record number of 0");
- return (EINVAL);
- }
- if (rep != NULL)
- *rep = recno;
-
- /*
- * Btree can neither create records nor read them in. Recno can
- * do both, see if we can find the record.
- */
- return (dbp->type == DB_RECNO ?
- __ram_update(dbc, recno, can_create) : 0);
-}
-
-/*
- * __ram_update --
- * Ensure the tree has records up to and including the specified one.
- */
-static int
-__ram_update(dbc, recno, can_create)
- DBC *dbc;
- db_recno_t recno;
- int can_create;
-{
- BTREE *t;
- DB *dbp;
- RECNO *rp;
- db_recno_t nrecs;
- int ret;
-
- dbp = dbc->dbp;
- t = dbp->internal;
- rp = t->recno;
-
- /*
- * If we can't create records and we've read the entire backing input
- * file, we're done.
- */
- if (!can_create && F_ISSET(rp, RECNO_EOF))
- return (0);
-
- /*
- * If we haven't seen this record yet, try to get it from the original
- * file.
- */
- if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
- return (ret);
- if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) {
- if ((ret = rp->re_irec(dbc, recno)) != 0)
- return (ret);
- if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
- return (ret);
- }
-
- /*
- * If we can create records, create empty ones up to the requested
- * record.
- */
- if (!can_create || recno <= nrecs + 1)
- return (0);
-
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if (dbc->rdata.ulen < rp->re_len) {
- if ((ret =
- __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
- return (ret);
- }
- dbc->rdata.ulen = rp->re_len;
- }
- dbc->rdata.size = rp->re_len;
- memset(dbc->rdata.data, rp->re_pad, rp->re_len);
- } else
- dbc->rdata.size = 0;
-
- while (recno > ++nrecs)
- if ((ret = __ram_add(dbc,
- &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0)
- return (ret);
- return (0);
-}
-
-/*
- * __ram_source --
- * Load information about the backing file.
- */
-static int
-__ram_source(dbp, rp, fname)
- DB *dbp;
- RECNO *rp;
- const char *fname;
-{
- size_t size;
- u_int32_t bytes, mbytes, oflags;
- int ret;
-
- /*
- * !!!
- * The caller has full responsibility for cleaning up on error --
- * (it has to anyway, in case it fails after this routine succeeds).
- */
- if ((ret = __db_appname(dbp->dbenv,
- DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0)
- return (ret);
-
- oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
- if ((ret =
- __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
- __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
- return (ret);
- }
-
- /*
- * XXX
- * We'd like to test to see if the file is too big to mmap. Since we
- * don't know what size or type off_t's or size_t's are, or the largest
- * unsigned integral type is, or what random insanity the local C
- * compiler will perpetrate, doing the comparison in a portable way is
- * flatly impossible. Hope that mmap fails if the file is too large.
- */
- if ((ret = __os_ioinfo(rp->re_source,
- rp->re_fd, &mbytes, &bytes, NULL)) != 0) {
- __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
- return (ret);
- }
- if (mbytes == 0 && bytes == 0) {
- F_SET(rp, RECNO_EOF);
- return (0);
- }
-
- size = mbytes * MEGABYTE + bytes;
- if ((ret = __db_mapfile(rp->re_source,
- rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0)
- return (ret);
- rp->re_cmap = rp->re_smap;
- rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
- rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ? __ram_fmap : __ram_vmap;
- return (0);
-}
-
-/*
- * __ram_writeback --
- * Rewrite the backing file.
- */
-static int
-__ram_writeback(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DBT key, data;
- RECNO *rp;
- db_recno_t keyno;
- ssize_t nw;
- int fd, ret, t_ret;
- u_int8_t delim, *pad;
-
- dbp = dbc->dbp;
- rp = ((BTREE *)dbp->internal)->recno;
-
- /* If the file wasn't modified, we're done. */
- if (!F_ISSET(rp, RECNO_MODIFIED))
- return (0);
-
- /* If there's no backing source file, we're done. */
- if (rp->re_source == NULL) {
- F_CLR(rp, RECNO_MODIFIED);
- return (0);
- }
-
- /*
- * Read any remaining records into the tree.
- *
- * !!!
- * This is why we can't support transactions when applications specify
- * backing (re_source) files. At this point we have to read in the
- * rest of the records from the file so that we can write all of the
- * records back out again, which could modify a page for which we'd
- * have to log changes and which we don't have locked. This could be
- * partially fixed by taking a snapshot of the entire file during the
- * db_open(), or, since db_open() isn't transaction protected, as part
- * of the first DB operation. But, if a checkpoint occurs then, the
- * part of the log holding the copy of the file could be discarded, and
- * that would make it impossible to recover in the face of disaster.
- * This could all probably be fixed, but it would require transaction
- * protecting the backing source file, i.e. mpool would have to know
- * about it, and we don't want to go there.
- */
- if ((ret =
- __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND)
- return (ret);
-
- /*
- * !!!
- * Close any underlying mmap region. This is required for Windows NT
- * (4.0, Service Pack 2) -- if the file is still mapped, the following
- * open will fail.
- */
- if (rp->re_smap != NULL) {
- (void)__db_unmapfile(rp->re_smap, rp->re_msize);
- rp->re_smap = NULL;
- }
-
- /* Get rid of any backing file descriptor, just on GP's. */
- if (rp->re_fd != -1) {
- (void)__os_close(rp->re_fd);
- rp->re_fd = -1;
- }
-
- /* Open the file, truncating it. */
- if ((ret = __db_open(rp->re_source,
- DB_SEQUENTIAL | DB_TRUNCATE,
- DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
- __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
- return (ret);
- }
-
- /*
- * We step through the records, writing each one out. Use the record
- * number and the dbp->get() function, instead of a cursor, so we find
- * and write out "deleted" or non-existent records.
- */
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- key.size = sizeof(db_recno_t);
- key.data = &keyno;
-
- /*
- * We'll need the delimiter if we're doing variable-length records,
- * and the pad character if we're doing fixed-length records.
- */
- delim = rp->re_delim;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if ((ret = __os_malloc(rp->re_len, NULL, &pad)) != 0)
- goto err;
- memset(pad, rp->re_pad, rp->re_len);
- } else
- COMPQUIET(pad, NULL);
- for (keyno = 1;; ++keyno) {
- switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
- case 0:
- if ((ret =
- __os_write(fd, data.data, data.size, &nw)) != 0)
- goto err;
- if (nw != (ssize_t)data.size) {
- ret = EIO;
- goto err;
- }
- break;
- case DB_KEYEMPTY:
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if ((ret =
- __os_write(fd, pad, rp->re_len, &nw)) != 0)
- goto err;
- if (nw != (ssize_t)rp->re_len) {
- ret = EIO;
- goto err;
- }
- }
- break;
- case DB_NOTFOUND:
- ret = 0;
- goto done;
- }
- if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if ((ret = __os_write(fd, &delim, 1, &nw)) != 0)
- goto err;
- if (nw != 1) {
- ret = EIO;
- goto err;
- }
- }
- }
-
-err:
-done: /* Close the file descriptor. */
- if ((t_ret = __os_close(fd)) != 0 || ret == 0)
- ret = t_ret;
-
- if (ret == 0)
- F_CLR(rp, RECNO_MODIFIED);
- return (ret);
-}
-
-/*
- * __ram_fmap --
- * Get fixed length records from a file.
- */
-static int
-__ram_fmap(dbc, top)
- DBC *dbc;
- db_recno_t top;
-{
- DB *dbp;
- DBT data;
- RECNO *rp;
- db_recno_t recno;
- u_int32_t len;
- u_int8_t *sp, *ep, *p;
- int ret;
-
- if ((ret = __bam_nrecs(dbc, &recno)) != 0)
- return (ret);
-
- dbp = dbc->dbp;
- rp = ((BTREE *)(dbp->internal))->recno;
-
- if (dbc->rdata.ulen < rp->re_len) {
- if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
- return (ret);
- }
- dbc->rdata.ulen = rp->re_len;
- }
-
- memset(&data, 0, sizeof(data));
- data.data = dbc->rdata.data;
- data.size = rp->re_len;
-
- sp = (u_int8_t *)rp->re_cmap;
- ep = (u_int8_t *)rp->re_emap;
- while (recno < top) {
- if (sp >= ep) {
- F_SET(rp, RECNO_EOF);
- return (DB_NOTFOUND);
- }
- len = rp->re_len;
- for (p = dbc->rdata.data;
- sp < ep && len > 0; *p++ = *sp++, --len)
- ;
-
- /*
- * Another process may have read this record from the input
- * file and stored it into the database already, in which
- * case we don't need to repeat that operation. We detect
- * this by checking if the last record we've read is greater
- * or equal to the number of records in the database.
- *
- * XXX
- * We should just do a seek, since the records are fixed
- * length.
- */
- if (rp->re_last >= recno) {
- if (len != 0)
- memset(p, rp->re_pad, len);
-
- ++recno;
- if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
- return (ret);
- }
- ++rp->re_last;
- }
- rp->re_cmap = sp;
- return (0);
-}
-
-/*
- * __ram_vmap --
- * Get variable length records from a file.
- */
-static int
-__ram_vmap(dbc, top)
- DBC *dbc;
- db_recno_t top;
-{
- DBT data;
- RECNO *rp;
- db_recno_t recno;
- u_int8_t *sp, *ep;
- int delim, ret;
-
- rp = ((BTREE *)(dbc->dbp->internal))->recno;
-
- if ((ret = __bam_nrecs(dbc, &recno)) != 0)
- return (ret);
-
- memset(&data, 0, sizeof(data));
-
- delim = rp->re_delim;
-
- sp = (u_int8_t *)rp->re_cmap;
- ep = (u_int8_t *)rp->re_emap;
- while (recno < top) {
- if (sp >= ep) {
- F_SET(rp, RECNO_EOF);
- return (DB_NOTFOUND);
- }
- for (data.data = sp; sp < ep && *sp != delim; ++sp)
- ;
-
- /*
- * Another process may have read this record from the input
- * file and stored it into the database already, in which
- * case we don't need to repeat that operation. We detect
- * this by checking if the last record we've read is greater
- * or equal to the number of records in the database.
- */
- if (rp->re_last >= recno) {
- data.size = sp - (u_int8_t *)data.data;
- ++recno;
- if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
- return (ret);
- }
- ++rp->re_last;
- ++sp;
- }
- rp->re_cmap = sp;
- return (0);
-}
-
-/*
- * __ram_add --
- * Add records into the tree.
- */
-static int
-__ram_add(dbc, recnop, data, flags, bi_flags)
- DBC *dbc;
- db_recno_t *recnop;
- DBT *data;
- u_int32_t flags, bi_flags;
-{
- BKEYDATA *bk;
- CURSOR *cp;
- DB *dbp;
- PAGE *h;
- db_indx_t indx;
- int exact, isdeleted, ret, stack;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
-retry: /* Find the slot for insertion. */
- if ((ret = __bam_rsearch(dbc, recnop,
- S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0)
- return (ret);
- h = cp->csp->page;
- indx = cp->csp->indx;
- stack = 1;
-
- /*
- * If re-numbering records, the on-page deleted flag means this record
- * was implicitly created. If not re-numbering records, the on-page
- * deleted flag means this record was implicitly created, or, it was
- * deleted at some time.
- *
- * If DB_NOOVERWRITE is set and the item already exists in the tree,
- * return an error unless the item was either marked for deletion or
- * only implicitly created.
- */
- isdeleted = 0;
- if (exact) {
- bk = GET_BKEYDATA(h, indx);
- if (B_DISSET(bk->type))
- isdeleted = 1;
- else
- if (flags == DB_NOOVERWRITE) {
- ret = DB_KEYEXIST;
- goto err;
- }
- }
-
- /*
- * Select the arguments for __bam_iitem() and do the insert. If the
- * key is an exact match, or we're replacing the data item with a
- * new data item, replace the current item. If the key isn't an exact
- * match, we're inserting a new key/data pair, before the search
- * location.
- */
- switch (ret = __bam_iitem(dbc,
- &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
- case 0:
- /*
- * Don't adjust anything.
- *
- * If we inserted a record, no cursors need adjusting because
- * the only new record it's possible to insert is at the very
- * end of the tree. The necessary adjustments to the internal
- * page counts were made by __bam_iitem().
- *
- * If we overwrote a record, no cursors need adjusting because
- * future DBcursor->get calls will simply return the underlying
- * record (there's no adjustment made for the DB_CURRENT flag
- * when a cursor get operation immediately follows a cursor
- * delete operation, and the normal adjustment for the DB_NEXT
- * flag is still correct).
- */
- break;
- case DB_NEEDSPLIT:
- /* Discard the stack of pages and split the page. */
- (void)__bam_stkrel(dbc, 0);
- stack = 0;
-
- if ((ret = __bam_split(dbc, recnop)) != 0)
- goto err;
-
- goto retry;
- /* NOTREACHED */
- default:
- goto err;
- }
-
-
-err: if (stack)
- __bam_stkrel(dbc, 0);
-
- return (ret);
-}
diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c
deleted file mode 100644
index 8efe4059a8..0000000000
--- a/db2/btree/bt_rsearch.c
+++ /dev/null
@@ -1,391 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_rsearch --
- * Search a btree for a record number.
- *
- * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
- */
-int
-__bam_rsearch(dbc, recnop, flags, stop, exactp)
- DBC *dbc;
- db_recno_t *recnop;
- u_int32_t flags;
- int stop, *exactp;
-{
- BINTERNAL *bi;
- CURSOR *cp;
- DB *dbp;
- DB_LOCK lock;
- PAGE *h;
- RINTERNAL *ri;
- db_indx_t indx, top;
- db_pgno_t pg;
- db_recno_t i, recno, total;
- int ret, stack;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- BT_STK_CLR(cp);
-
- /*
- * There are several ways we search a btree tree. The flags argument
- * specifies if we're acquiring read or write locks and if we are
- * locking pairs of pages. In addition, if we're adding or deleting
- * an item, we have to lock the entire tree, regardless. See btree.h
- * for more details.
- *
- * If write-locking pages, we need to know whether or not to acquire a
- * write lock on a page before getting it. This depends on how deep it
- * is in tree, which we don't know until we acquire the root page. So,
- * if we need to lock the root page we may have to upgrade it later,
- * because we won't get the correct lock initially.
- *
- * Retrieve the root page.
- */
- pg = PGNO_ROOT;
- stack = LF_ISSET(S_STACK);
- if ((ret = __bam_lget(dbc,
- 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
- (void)__BT_LPUT(dbc, lock);
- return (ret);
- }
-
- /*
- * Decide if we need to save this page; if we do, write lock it.
- * We deliberately don't lock-couple on this call. If the tree
- * is tiny, i.e., one page, and two threads are busily updating
- * the root page, we're almost guaranteed deadlocks galore, as
- * each one gets a read lock and then blocks the other's attempt
- * for a write lock.
- */
- if (!stack &&
- ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
- (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
- if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
- (void)__BT_LPUT(dbc, lock);
- return (ret);
- }
- stack = 1;
- }
-
- /*
- * If appending to the tree, set the record number now -- we have the
- * root page locked.
- *
- * Delete only deletes exact matches, read only returns exact matches.
- * Note, this is different from __bam_search(), which returns non-exact
- * matches for read.
- *
- * The record may not exist. We can only return the correct location
- * for the record immediately after the last record in the tree, so do
- * a fast check now.
- */
- total = RE_NREC(h);
- if (LF_ISSET(S_APPEND)) {
- *exactp = 0;
- *recnop = recno = total + 1;
- } else {
- recno = *recnop;
- if (recno <= total)
- *exactp = 1;
- else {
- *exactp = 0;
- if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
- return (DB_NOTFOUND);
- }
- }
- }
-
- /*
- * !!!
- * Record numbers in the tree are 0-based, but the recno is
- * 1-based. All of the calculations below have to take this
- * into account.
- */
- for (total = 0;;) {
- switch (TYPE(h)) {
- case P_LBTREE:
- recno -= total;
-
- /*
- * There may be logically deleted records on the page,
- * walk the page correcting for them. The record may
- * not exist if there are enough deleted records in the
- * page.
- */
- if (recno <= (db_recno_t)NUM_ENT(h) / P_INDX)
- for (i = recno - 1;; --i) {
- if (B_DISSET(GET_BKEYDATA(h,
- i * P_INDX + O_INDX)->type))
- ++recno;
- if (i == 0)
- break;
- }
- if (recno > (db_recno_t)NUM_ENT(h) / P_INDX) {
- *exactp = 0;
- if (!LF_ISSET(S_PAST_EOF) || recno >
- (db_recno_t)(NUM_ENT(h) / P_INDX + 1)) {
- ret = DB_NOTFOUND;
- goto err;
- }
-
- }
-
- /* Correct from 1-based to 0-based for a page offset. */
- --recno;
- BT_STK_ENTER(cp, h, recno * P_INDX, lock, ret);
- return (ret);
- case P_IBTREE:
- for (indx = 0, top = NUM_ENT(h);;) {
- bi = GET_BINTERNAL(h, indx);
- if (++indx == top || total + bi->nrecs >= recno)
- break;
- total += bi->nrecs;
- }
- pg = bi->pgno;
- break;
- case P_LRECNO:
- recno -= total;
-
- /* Correct from 1-based to 0-based for a page offset. */
- --recno;
- BT_STK_ENTER(cp, h, recno, lock, ret);
- return (ret);
- case P_IRECNO:
- for (indx = 0, top = NUM_ENT(h);;) {
- ri = GET_RINTERNAL(h, indx);
- if (++indx == top || total + ri->nrecs >= recno)
- break;
- total += ri->nrecs;
- }
- pg = ri->pgno;
- break;
- default:
- return (__db_pgfmt(dbp, h->pgno));
- }
- --indx;
-
- if (stack) {
- /* Return if this is the lowest page wanted. */
- if (LF_ISSET(S_PARENT) && stop == h->level) {
- BT_STK_ENTER(cp, h, indx, lock, ret);
- return (ret);
- }
- BT_STK_PUSH(cp, h, indx, lock, ret);
- if (ret != 0)
- goto err;
-
- if ((ret =
- __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
- goto err;
- } else {
- /*
- * Decide if we want to return a pointer to the next
- * page in the stack. If we do, write lock it and
- * never unlock it.
- */
- if ((LF_ISSET(S_PARENT) &&
- (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
- (h->level - 1) == LEAFLEVEL)
- stack = 1;
-
- (void)memp_fput(dbp->mpf, h, 0);
-
- if ((ret =
- __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ?
- DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
- goto err;
- }
-
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
- goto err;
- }
- /* NOTREACHED */
-
-err: BT_STK_POP(cp);
- __bam_stkrel(dbc, 0);
- return (ret);
-}
-
-/*
- * __bam_adjust --
- * Adjust the tree after adding or deleting a record.
- *
- * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
- */
-int
-__bam_adjust(dbc, adjust)
- DBC *dbc;
- int32_t adjust;
-{
- CURSOR *cp;
- DB *dbp;
- EPG *epg;
- PAGE *h;
- int ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- /* Update the record counts for the tree. */
- for (epg = cp->sp; epg <= cp->csp; ++epg) {
- h = epg->page;
- if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
- dbc->txn, &LSN(h), 0, dbp->log_fileid,
- PGNO(h), &LSN(h), (u_int32_t)epg->indx,
- adjust, 1)) != 0)
- return (ret);
-
- if (TYPE(h) == P_IBTREE)
- GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
- else
- GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
-
- if (PGNO(h) == PGNO_ROOT)
- RE_NREC_ADJ(h, adjust);
-
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
- return (ret);
- }
- }
- return (0);
-}
-
-/*
- * __bam_nrecs --
- * Return the number of records in the tree.
- *
- * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
- */
-int
-__bam_nrecs(dbc, rep)
- DBC *dbc;
- db_recno_t *rep;
-{
- DB *dbp;
- DB_LOCK lock;
- PAGE *h;
- db_pgno_t pgno;
- int ret;
-
- dbp = dbc->dbp;
-
- pgno = PGNO_ROOT;
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
- return (ret);
-
- *rep = RE_NREC(h);
-
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_TLPUT(dbc, lock);
-
- return (0);
-}
-
-/*
- * __bam_total --
- * Return the number of records below a page.
- *
- * PUBLIC: db_recno_t __bam_total __P((PAGE *));
- */
-db_recno_t
-__bam_total(h)
- PAGE *h;
-{
- db_recno_t nrecs;
- db_indx_t indx, top;
-
- nrecs = 0;
- top = NUM_ENT(h);
-
- switch (TYPE(h)) {
- case P_LBTREE:
- /* Check for logically deleted records. */
- for (indx = 0; indx < top; indx += P_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
- ++nrecs;
- break;
- case P_IBTREE:
- for (indx = 0; indx < top; indx += O_INDX)
- nrecs += GET_BINTERNAL(h, indx)->nrecs;
- break;
- case P_LRECNO:
- nrecs = NUM_ENT(h);
- break;
- case P_IRECNO:
- for (indx = 0; indx < top; indx += O_INDX)
- nrecs += GET_RINTERNAL(h, indx)->nrecs;
- break;
- }
-
- return (nrecs);
-}
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
deleted file mode 100644
index 1f439a4261..0000000000
--- a/db2/btree/bt_search.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_search.c 10.25 (Sleepycat) 12/16/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_search --
- * Search a btree for a key.
- *
- * PUBLIC: int __bam_search __P((DBC *,
- * PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *));
- */
-int
-__bam_search(dbc, key, flags, stop, recnop, exactp)
- DBC *dbc;
- const DBT *key;
- u_int32_t flags;
- int stop, *exactp;
- db_recno_t *recnop;
-{
- BTREE *t;
- CURSOR *cp;
- DB *dbp;
- DB_LOCK lock;
- PAGE *h;
- db_indx_t base, i, indx, lim;
- db_pgno_t pg;
- db_recno_t recno;
- int cmp, jump, ret, stack;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- t = dbp->internal;
- recno = 0;
-
- BT_STK_CLR(cp);
-
- /*
- * There are several ways we search a btree tree. The flags argument
- * specifies if we're acquiring read or write locks, if we position
- * to the first or last item in a set of duplicates, if we return
- * deleted items, and if we are locking pairs of pages. In addition,
- * if we're modifying record numbers, we have to lock the entire tree
- * regardless. See btree.h for more details.
- *
- * If write-locking pages, we need to know whether or not to acquire a
- * write lock on a page before getting it. This depends on how deep it
- * is in tree, which we don't know until we acquire the root page. So,
- * if we need to lock the root page we may have to upgrade it later,
- * because we won't get the correct lock initially.
- *
- * Retrieve the root page.
- */
- pg = PGNO_ROOT;
- stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK);
- if ((ret = __bam_lget(dbc,
- 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
- (void)__BT_LPUT(dbc, lock);
- return (ret);
- }
-
- /*
- * Decide if we need to save this page; if we do, write lock it.
- * We deliberately don't lock-couple on this call. If the tree
- * is tiny, i.e., one page, and two threads are busily updating
- * the root page, we're almost guaranteed deadlocks galore, as
- * each one gets a read lock and then blocks the other's attempt
- * for a write lock.
- */
- if (!stack &&
- ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
- (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
- if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
- return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
- (void)__BT_LPUT(dbc, lock);
- return (ret);
- }
- stack = 1;
- }
-
- for (;;) {
- /*
- * Do a binary search on the current page. If we're searching
- * a leaf page, we have to manipulate the indices in groups of
- * two. If we're searching an internal page, they're an index
- * per page item. If we find an exact match on a leaf page,
- * we're done.
- */
- jump = TYPE(h) == P_LBTREE ? P_INDX : O_INDX;
- for (base = 0,
- lim = NUM_ENT(h) / (db_indx_t)jump; lim != 0; lim >>= 1) {
- indx = base + ((lim >> 1) * jump);
- if ((cmp =
- __bam_cmp(dbp, key, h, indx, t->bt_compare)) == 0) {
- if (TYPE(h) == P_LBTREE)
- goto match;
- goto next;
- }
- if (cmp > 0) {
- base = indx + jump;
- --lim;
- }
- }
-
- /*
- * No match found. Base is the smallest index greater than
- * key and may be zero or a last + O_INDX index.
- *
- * If it's a leaf page, return base as the "found" value.
- * Delete only deletes exact matches.
- */
- if (TYPE(h) == P_LBTREE) {
- *exactp = 0;
-
- if (LF_ISSET(S_EXACT))
- goto notfound;
-
- /*
- * !!!
- * Possibly returning a deleted record -- DB_SET_RANGE,
- * DB_KEYFIRST and DB_KEYLAST don't require an exact
- * match, and we don't want to walk multiple pages here
- * to find an undeleted record. This is handled in the
- * __bam_c_search() routine.
- */
- BT_STK_ENTER(cp, h, base, lock, ret);
- return (ret);
- }
-
- /*
- * If it's not a leaf page, record the internal page (which is
- * a parent page for the key). Decrement the base by 1 if it's
- * non-zero so that if a split later occurs, the inserted page
- * will be to the right of the saved page.
- */
- indx = base > 0 ? base - O_INDX : base;
-
- /*
- * If we're trying to calculate the record number, sum up
- * all the record numbers on this page up to the indx point.
- */
- if (recnop != NULL)
- for (i = 0; i < indx; ++i)
- recno += GET_BINTERNAL(h, i)->nrecs;
-
-next: pg = GET_BINTERNAL(h, indx)->pgno;
- if (stack) {
- /* Return if this is the lowest page wanted. */
- if (LF_ISSET(S_PARENT) && stop == h->level) {
- BT_STK_ENTER(cp, h, indx, lock, ret);
- return (ret);
- }
- BT_STK_PUSH(cp, h, indx, lock, ret);
- if (ret != 0)
- goto err;
-
- if ((ret =
- __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
- goto err;
- } else {
- /*
- * Decide if we want to return a reference to the next
- * page in the return stack. If so, lock it and never
- * unlock it.
- */
- if ((LF_ISSET(S_PARENT) &&
- (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
- (h->level - 1) == LEAFLEVEL)
- stack = 1;
-
- (void)memp_fput(dbp->mpf, h, 0);
-
- if ((ret =
- __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ?
- DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
- goto err;
- }
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
- goto err;
- }
- /* NOTREACHED */
-
-match: *exactp = 1;
-
- /*
- * If we're trying to calculate the record number, add in the
- * offset on this page and correct for the fact that records
- * in the tree are 0-based.
- */
- if (recnop != NULL)
- *recnop = recno + (indx / P_INDX) + 1;
-
- /*
- * If we got here, we know that we have a btree leaf page.
- *
- * If there are duplicates, go to the first/last one. This is
- * safe because we know that we're not going to leave the page,
- * all duplicate sets that are not on overflow pages exist on a
- * single leaf page.
- */
- if (LF_ISSET(S_DUPLAST))
- while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX])
- indx += P_INDX;
- else
- while (indx > 0 &&
- h->inp[indx] == h->inp[indx - P_INDX])
- indx -= P_INDX;
-
- /*
- * Now check if we are allowed to return deleted items; if not
- * find the next (or previous) non-deleted item.
- */
- if (LF_ISSET(S_DELNO)) {
- if (LF_ISSET(S_DUPLAST))
- while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) &&
- indx > 0 &&
- h->inp[indx] == h->inp[indx - P_INDX])
- indx -= P_INDX;
- else
- while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) &&
- indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX])
- indx += P_INDX;
-
- if (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
- goto notfound;
- }
-
- BT_STK_ENTER(cp, h, indx, lock, ret);
- return (ret);
-
-notfound:
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
- ret = DB_NOTFOUND;
-
-err: if (cp->csp > cp->sp) {
- BT_STK_POP(cp);
- __bam_stkrel(dbc, 0);
- }
- return (ret);
-}
-
-/*
- * __bam_stkrel --
- * Release all pages currently held in the stack.
- *
- * PUBLIC: int __bam_stkrel __P((DBC *, int));
- */
-int
-__bam_stkrel(dbc, nolocks)
- DBC *dbc;
- int nolocks;
-{
- CURSOR *cp;
- DB *dbp;
- EPG *epg;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- /* Release inner pages first. */
- for (epg = cp->sp; epg <= cp->csp; ++epg) {
- if (epg->page != NULL)
- (void)memp_fput(dbp->mpf, epg->page, 0);
- if (epg->lock != LOCK_INVALID) {
- if (nolocks)
- (void)__BT_LPUT(dbc, epg->lock);
- else
- (void)__BT_TLPUT(dbc, epg->lock);
- }
- }
-
- /* Clear the stack, all pages have been released. */
- BT_STK_CLR(cp);
-
- return (0);
-}
-
-/*
- * __bam_stkgrow --
- * Grow the stack.
- *
- * PUBLIC: int __bam_stkgrow __P((CURSOR *));
- */
-int
-__bam_stkgrow(cp)
- CURSOR *cp;
-{
- EPG *p;
- size_t entries;
- int ret;
-
- entries = cp->esp - cp->sp;
-
- if ((ret = __os_calloc(entries * 2, sizeof(EPG), &p)) != 0)
- return (ret);
- memcpy(p, cp->sp, entries * sizeof(EPG));
- if (cp->sp != cp->stack)
- __os_free(cp->sp, entries * sizeof(EPG));
- cp->sp = p;
- cp->csp = p + entries;
- cp->esp = p + entries * 2;
- return (0);
-}
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
deleted file mode 100644
index 1d8e926d85..0000000000
--- a/db2/btree/bt_split.c
+++ /dev/null
@@ -1,966 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_split.c 10.33 (Sleepycat) 10/13/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <limits.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-static int __bam_broot __P((DBC *, PAGE *, PAGE *, PAGE *));
-static int __bam_page __P((DBC *, EPG *, EPG *));
-static int __bam_pinsert __P((DBC *, EPG *, PAGE *, PAGE *));
-static int __bam_psplit __P((DBC *, EPG *, PAGE *, PAGE *, db_indx_t *));
-static int __bam_root __P((DBC *, EPG *));
-static int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *));
-
-/*
- * __bam_split --
- * Split a page.
- *
- * PUBLIC: int __bam_split __P((DBC *, void *));
- */
-int
-__bam_split(dbc, arg)
- DBC *dbc;
- void *arg;
-{
- CURSOR *cp;
- DB *dbp;
- enum { UP, DOWN } dir;
- int exact, level, ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
-
- /*
- * The locking protocol we use to avoid deadlock to acquire locks by
- * walking down the tree, but we do it as lazily as possible, locking
- * the root only as a last resort. We expect all stack pages to have
- * been discarded before we're called; we discard all short-term locks.
- *
- * When __bam_split is first called, we know that a leaf page was too
- * full for an insert. We don't know what leaf page it was, but we
- * have the key/recno that caused the problem. We call XX_search to
- * reacquire the leaf page, but this time get both the leaf page and
- * its parent, locked. We then split the leaf page and see if the new
- * internal key will fit into the parent page. If it will, we're done.
- *
- * If it won't, we discard our current locks and repeat the process,
- * only this time acquiring the parent page and its parent, locked.
- * This process repeats until we succeed in the split, splitting the
- * root page as the final resort. The entire process then repeats,
- * as necessary, until we split a leaf page.
- *
- * XXX
- * A traditional method of speeding this up is to maintain a stack of
- * the pages traversed in the original search. You can detect if the
- * stack is correct by storing the page's LSN when it was searched and
- * comparing that LSN with the current one when it's locked during the
- * split. This would be an easy change for this code, but I have no
- * numbers that indicate it's worthwhile.
- */
- for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
- /*
- * Acquire a page and its parent, locked.
- */
- if ((ret = (dbp->type == DB_BTREE ?
- __bam_search(dbc, arg, S_WRPAIR, level, NULL, &exact) :
- __bam_rsearch(dbc,
- (db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0)
- return (ret);
-
- /* Split the page. */
- ret = cp->csp[0].page->pgno == PGNO_ROOT ?
- __bam_root(dbc, &cp->csp[0]) :
- __bam_page(dbc, &cp->csp[-1], &cp->csp[0]);
- BT_STK_CLR(cp);
-
- switch (ret) {
- case 0:
- /* Once we've split the leaf page, we're done. */
- if (level == LEAFLEVEL)
- return (0);
-
- /* Switch directions. */
- if (dir == UP)
- dir = DOWN;
- break;
- case DB_NEEDSPLIT:
- /*
- * It's possible to fail to split repeatedly, as other
- * threads may be modifying the tree, or the page usage
- * is sufficiently bad that we don't get enough space
- * the first time.
- */
- if (dir == DOWN)
- dir = UP;
- break;
- default:
- return (ret);
- }
- }
- /* NOTREACHED */
-}
-
-/*
- * __bam_root --
- * Split the root page of a btree.
- */
-static int
-__bam_root(dbc, cp)
- DBC *dbc;
- EPG *cp;
-{
- DB *dbp;
- PAGE *lp, *rp;
- db_indx_t split;
- int ret;
-
- dbp = dbc->dbp;
-
- /* Yeah, right. */
- if (cp->page->level >= MAXBTREELEVEL) {
- ret = ENOSPC;
- goto err;
- }
-
- /* Create new left and right pages for the split. */
- lp = rp = NULL;
- if ((ret = __bam_new(dbc, TYPE(cp->page), &lp)) != 0 ||
- (ret = __bam_new(dbc, TYPE(cp->page), &rp)) != 0)
- goto err;
- P_INIT(lp, dbp->pgsize, lp->pgno,
- PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
- cp->page->level, TYPE(cp->page));
- P_INIT(rp, dbp->pgsize, rp->pgno,
- ISINTERNAL(cp->page) ? PGNO_INVALID : lp->pgno, PGNO_INVALID,
- cp->page->level, TYPE(cp->page));
-
- /* Split the page. */
- if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0)
- goto err;
-
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- DBT __a;
- DB_LSN __lsn;
- memset(&__a, 0, sizeof(__a));
- __a.data = cp->page;
- __a.size = dbp->pgsize;
- ZERO_LSN(__lsn);
- if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbc->txn,
- &LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp),
- PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &__lsn,
- &__a)) != 0)
- goto err;
- LSN(lp) = LSN(rp) = LSN(cp->page);
- }
-
- /* Clean up the new root page. */
- if ((ret = (dbp->type == DB_RECNO ?
- __ram_root(dbc, cp->page, lp, rp) :
- __bam_broot(dbc, cp->page, lp, rp))) != 0)
- goto err;
-
- /* Adjust any cursors. Do it last so we don't have to undo it. */
- __bam_ca_split(dbp, cp->page->pgno, lp->pgno, rp->pgno, split, 1);
-
- /* Success -- write the real pages back to the store. */
- (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
- (void)__BT_TLPUT(dbc, cp->lock);
- (void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY);
- (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
-
- return (0);
-
-err: if (lp != NULL)
- (void)__bam_free(dbc, lp);
- if (rp != NULL)
- (void)__bam_free(dbc, rp);
- (void)memp_fput(dbp->mpf, cp->page, 0);
- (void)__BT_TLPUT(dbc, cp->lock);
- return (ret);
-}
-
-/*
- * __bam_page --
- * Split the non-root page of a btree.
- */
-static int
-__bam_page(dbc, pp, cp)
- DBC *dbc;
- EPG *pp, *cp;
-{
- DB *dbp;
- DB_LOCK tplock;
- PAGE *lp, *rp, *tp;
- db_indx_t split;
- int ret;
-
- dbp = dbc->dbp;
- lp = rp = tp = NULL;
- ret = -1;
-
- /* Create new right page for the split. */
- if ((ret = __bam_new(dbc, TYPE(cp->page), &rp)) != 0)
- goto err;
- P_INIT(rp, dbp->pgsize, rp->pgno,
- ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno,
- ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno,
- cp->page->level, TYPE(cp->page));
-
- /* Create new left page for the split. */
- if ((ret = __os_malloc(dbp->pgsize, NULL, &lp)) != 0)
- goto err;
- P_INIT(lp, dbp->pgsize, cp->page->pgno,
- ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->prev_pgno,
- ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
- cp->page->level, TYPE(cp->page));
- ZERO_LSN(lp->lsn);
-
- /*
- * Split right.
- *
- * Only the indices are sorted on the page, i.e., the key/data pairs
- * aren't, so it's simpler to copy the data from the split page onto
- * two new pages instead of copying half the data to the right page
- * and compacting the left page in place. Since the left page can't
- * change, we swap the original and the allocated left page after the
- * split.
- */
- if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0)
- goto err;
-
- /*
- * Fix up the previous pointer of any leaf page following the split
- * page.
- *
- * !!!
- * There are interesting deadlock situations here as we write-lock a
- * page that's not in our direct ancestry. Consider a cursor walking
- * through the leaf pages, that has the previous page read-locked and
- * is waiting on a lock for the page we just split. It will deadlock
- * here. If this is a problem, we can fail in the split; it's not a
- * problem as the split will succeed after the cursor passes through
- * the page we're splitting.
- */
- if (TYPE(cp->page) == P_LBTREE && rp->next_pgno != PGNO_INVALID) {
- if ((ret = __bam_lget(dbc,
- 0, rp->next_pgno, DB_LOCK_WRITE, &tplock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &rp->next_pgno, 0, &tp)) != 0)
- goto err;
- }
-
- /* Insert the new pages into the parent page. */
- if ((ret = __bam_pinsert(dbc, pp, lp, rp)) != 0)
- goto err;
-
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- DBT __a;
- DB_LSN __lsn;
- memset(&__a, 0, sizeof(__a));
- __a.data = cp->page;
- __a.size = dbp->pgsize;
- if (tp == NULL)
- ZERO_LSN(__lsn);
- if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbc->txn,
- &cp->page->lsn, 0, dbp->log_fileid, PGNO(cp->page),
- &LSN(cp->page), PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp),
- tp == NULL ? 0 : PGNO(tp),
- tp == NULL ? &__lsn : &LSN(tp), &__a)) != 0)
- goto err;
-
- LSN(lp) = LSN(rp) = LSN(cp->page);
- if (tp != NULL)
- LSN(tp) = LSN(cp->page);
- }
-
- /* Copy the allocated page into place. */
- memcpy(cp->page, lp, LOFFSET(lp));
- memcpy((u_int8_t *)cp->page + HOFFSET(lp),
- (u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
- __os_free(lp, dbp->pgsize);
- lp = NULL;
-
- /* Finish the next-page link. */
- if (tp != NULL)
- tp->prev_pgno = rp->pgno;
-
- /* Adjust any cursors. Do so last so we don't have to undo it. */
- __bam_ca_split(dbp, cp->page->pgno, cp->page->pgno, rp->pgno, split, 0);
-
- /* Success -- write the real pages back to the store. */
- (void)memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY);
- (void)__BT_TLPUT(dbc, pp->lock);
- (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
- (void)__BT_TLPUT(dbc, cp->lock);
- (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
- if (tp != NULL) {
- (void)memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY);
- (void)__BT_TLPUT(dbc, tplock);
- }
- return (0);
-
-err: if (lp != NULL)
- __os_free(lp, dbp->pgsize);
- if (rp != NULL)
- (void)__bam_free(dbc, rp);
- if (tp != NULL) {
- (void)memp_fput(dbp->mpf, tp, 0);
- if (ret == DB_NEEDSPLIT)
- (void)__BT_LPUT(dbc, tplock);
- else
- (void)__BT_TLPUT(dbc, tplock);
- }
- (void)memp_fput(dbp->mpf, pp->page, 0);
- if (ret == DB_NEEDSPLIT)
- (void)__BT_LPUT(dbc, pp->lock);
- else
- (void)__BT_TLPUT(dbc, pp->lock);
- (void)memp_fput(dbp->mpf, cp->page, 0);
- if (ret == DB_NEEDSPLIT)
- (void)__BT_LPUT(dbc, cp->lock);
- else
- (void)__BT_TLPUT(dbc, cp->lock);
- return (ret);
-}
-
-/*
- * __bam_broot --
- * Fix up the btree root page after it has been split.
- */
-static int
-__bam_broot(dbc, rootp, lp, rp)
- DBC *dbc;
- PAGE *rootp, *lp, *rp;
-{
- BINTERNAL bi, *child_bi;
- BKEYDATA *child_bk;
- DB *dbp;
- DBT hdr, data;
- int ret;
-
- dbp = dbc->dbp;
-
- /*
- * If the root page was a leaf page, change it into an internal page.
- * We copy the key we split on (but not the key's data, in the case of
- * a leaf page) to the new root page.
- */
- P_INIT(rootp, dbp->pgsize,
- PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
-
- memset(&data, 0, sizeof(data));
- memset(&hdr, 0, sizeof(hdr));
-
- /*
- * The btree comparison code guarantees that the left-most key on any
- * level of the tree is never used, so it doesn't need to be filled in.
- */
- memset(&bi, 0, sizeof(bi));
- bi.len = 0;
- B_TSET(bi.type, B_KEYDATA, 0);
- bi.pgno = lp->pgno;
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
- bi.nrecs = __bam_total(lp);
- RE_NREC_SET(rootp, bi.nrecs);
- }
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- if ((ret =
- __db_pitem(dbc, rootp, 0, BINTERNAL_SIZE(0), &hdr, NULL)) != 0)
- return (ret);
-
- switch (TYPE(rp)) {
- case P_IBTREE:
- /* Copy the first key of the child page onto the root page. */
- child_bi = GET_BINTERNAL(rp, 0);
-
- bi.len = child_bi->len;
- B_TSET(bi.type, child_bi->type, 0);
- bi.pgno = rp->pgno;
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
- bi.nrecs = __bam_total(rp);
- RE_NREC_ADJ(rootp, bi.nrecs);
- }
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- data.data = child_bi->data;
- data.size = child_bi->len;
- if ((ret = __db_pitem(dbc, rootp, 1,
- BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
- return (ret);
-
- /* Increment the overflow ref count. */
- if (B_TYPE(child_bi->type) == B_OVERFLOW)
- if ((ret = __db_ovref(dbc,
- ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
- return (ret);
- break;
- case P_LBTREE:
- /* Copy the first key of the child page onto the root page. */
- child_bk = GET_BKEYDATA(rp, 0);
- switch (B_TYPE(child_bk->type)) {
- case B_KEYDATA:
- bi.len = child_bk->len;
- B_TSET(bi.type, child_bk->type, 0);
- bi.pgno = rp->pgno;
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
- bi.nrecs = __bam_total(rp);
- RE_NREC_ADJ(rootp, bi.nrecs);
- }
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- data.data = child_bk->data;
- data.size = child_bk->len;
- if ((ret = __db_pitem(dbc, rootp, 1,
- BINTERNAL_SIZE(child_bk->len), &hdr, &data)) != 0)
- return (ret);
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- bi.len = BOVERFLOW_SIZE;
- B_TSET(bi.type, child_bk->type, 0);
- bi.pgno = rp->pgno;
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
- bi.nrecs = __bam_total(rp);
- RE_NREC_ADJ(rootp, bi.nrecs);
- }
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- data.data = child_bk;
- data.size = BOVERFLOW_SIZE;
- if ((ret = __db_pitem(dbc, rootp, 1,
- BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
- return (ret);
-
- /* Increment the overflow ref count. */
- if (B_TYPE(child_bk->type) == B_OVERFLOW)
- if ((ret = __db_ovref(dbc,
- ((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
- return (ret);
- break;
- default:
- return (__db_pgfmt(dbp, rp->pgno));
- }
- break;
- default:
- return (__db_pgfmt(dbp, rp->pgno));
- }
- return (0);
-}
-
-/*
- * __ram_root --
- * Fix up the recno root page after it has been split.
- */
-static int
-__ram_root(dbc, rootp, lp, rp)
- DBC *dbc;
- PAGE *rootp, *lp, *rp;
-{
- DB *dbp;
- DBT hdr;
- RINTERNAL ri;
- int ret;
-
- dbp = dbc->dbp;
-
- /* Initialize the page. */
- P_INIT(rootp, dbp->pgsize,
- PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO);
-
- /* Initialize the header. */
- memset(&hdr, 0, sizeof(hdr));
- hdr.data = &ri;
- hdr.size = RINTERNAL_SIZE;
-
- /* Insert the left and right keys, set the header information. */
- ri.pgno = lp->pgno;
- ri.nrecs = __bam_total(lp);
- if ((ret = __db_pitem(dbc, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
- return (ret);
- RE_NREC_SET(rootp, ri.nrecs);
- ri.pgno = rp->pgno;
- ri.nrecs = __bam_total(rp);
- if ((ret = __db_pitem(dbc, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
- return (ret);
- RE_NREC_ADJ(rootp, ri.nrecs);
- return (0);
-}
-
-/*
- * __bam_pinsert --
- * Insert a new key into a parent page, completing the split.
- */
-static int
-__bam_pinsert(dbc, parent, lchild, rchild)
- DBC *dbc;
- EPG *parent;
- PAGE *lchild, *rchild;
-{
- BINTERNAL bi, *child_bi;
- BKEYDATA *child_bk, *tmp_bk;
- BTREE *t;
- DB *dbp;
- DBT a, b, hdr, data;
- PAGE *ppage;
- RINTERNAL ri;
- db_indx_t off;
- db_recno_t nrecs;
- u_int32_t n, nbytes, nksize;
- int ret;
-
- dbp = dbc->dbp;
- t = dbp->internal;
- ppage = parent->page;
-
- /* If handling record numbers, count records split to the right page. */
- nrecs = dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM) ?
- __bam_total(rchild) : 0;
-
- /*
- * Now we insert the new page's first key into the parent page, which
- * completes the split. The parent points to a PAGE and a page index
- * offset, where the new key goes ONE AFTER the index, because we split
- * to the right.
- *
- * XXX
- * Some btree algorithms replace the key for the old page as well as
- * the new page. We don't, as there's no reason to believe that the
- * first key on the old page is any better than the key we have, and,
- * in the case of a key being placed at index 0 causing the split, the
- * key is unavailable.
- */
- off = parent->indx + O_INDX;
-
- /*
- * Calculate the space needed on the parent page.
- *
- * Prefix trees: space hack used when inserting into BINTERNAL pages.
- * Retain only what's needed to distinguish between the new entry and
- * the LAST entry on the page to its left. If the keys compare equal,
- * retain the entire key. We ignore overflow keys, and the entire key
- * must be retained for the next-to-leftmost key on the leftmost page
- * of each level, or the search will fail. Applicable ONLY to internal
- * pages that have leaf pages as children. Further reduction of the
- * key between pairs of internal pages loses too much information.
- */
- switch (TYPE(rchild)) {
- case P_IBTREE:
- child_bi = GET_BINTERNAL(rchild, 0);
- nbytes = BINTERNAL_PSIZE(child_bi->len);
-
- if (P_FREESPACE(ppage) < nbytes)
- return (DB_NEEDSPLIT);
-
- /* Add a new record for the right page. */
- memset(&bi, 0, sizeof(bi));
- bi.len = child_bi->len;
- B_TSET(bi.type, child_bi->type, 0);
- bi.pgno = rchild->pgno;
- bi.nrecs = nrecs;
- memset(&hdr, 0, sizeof(hdr));
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- memset(&data, 0, sizeof(data));
- data.data = child_bi->data;
- data.size = child_bi->len;
- if ((ret = __db_pitem(dbc, ppage, off,
- BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
- return (ret);
-
- /* Increment the overflow ref count. */
- if (B_TYPE(child_bi->type) == B_OVERFLOW)
- if ((ret = __db_ovref(dbc,
- ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
- return (ret);
- break;
- case P_LBTREE:
- child_bk = GET_BKEYDATA(rchild, 0);
- switch (B_TYPE(child_bk->type)) {
- case B_KEYDATA:
- nbytes = BINTERNAL_PSIZE(child_bk->len);
- nksize = child_bk->len;
- if (t->bt_prefix == NULL)
- goto noprefix;
- if (ppage->prev_pgno == PGNO_INVALID && off <= 1)
- goto noprefix;
- tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) - P_INDX);
- if (B_TYPE(tmp_bk->type) != B_KEYDATA)
- goto noprefix;
- memset(&a, 0, sizeof(a));
- a.size = tmp_bk->len;
- a.data = tmp_bk->data;
- memset(&b, 0, sizeof(b));
- b.size = child_bk->len;
- b.data = child_bk->data;
- nksize = t->bt_prefix(&a, &b);
- if ((n = BINTERNAL_PSIZE(nksize)) < nbytes)
- nbytes = n;
- else
-noprefix: nksize = child_bk->len;
-
- if (P_FREESPACE(ppage) < nbytes)
- return (DB_NEEDSPLIT);
-
- memset(&bi, 0, sizeof(bi));
- bi.len = nksize;
- B_TSET(bi.type, child_bk->type, 0);
- bi.pgno = rchild->pgno;
- bi.nrecs = nrecs;
- memset(&hdr, 0, sizeof(hdr));
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- memset(&data, 0, sizeof(data));
- data.data = child_bk->data;
- data.size = nksize;
- if ((ret = __db_pitem(dbc, ppage, off,
- BINTERNAL_SIZE(nksize), &hdr, &data)) != 0)
- return (ret);
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
-
- if (P_FREESPACE(ppage) < nbytes)
- return (DB_NEEDSPLIT);
-
- memset(&bi, 0, sizeof(bi));
- bi.len = BOVERFLOW_SIZE;
- B_TSET(bi.type, child_bk->type, 0);
- bi.pgno = rchild->pgno;
- bi.nrecs = nrecs;
- memset(&hdr, 0, sizeof(hdr));
- hdr.data = &bi;
- hdr.size = SSZA(BINTERNAL, data);
- memset(&data, 0, sizeof(data));
- data.data = child_bk;
- data.size = BOVERFLOW_SIZE;
- if ((ret = __db_pitem(dbc, ppage, off,
- BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
- return (ret);
-
- /* Increment the overflow ref count. */
- if (B_TYPE(child_bk->type) == B_OVERFLOW)
- if ((ret = __db_ovref(dbc,
- ((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
- return (ret);
- break;
- default:
- return (__db_pgfmt(dbp, rchild->pgno));
- }
- break;
- case P_IRECNO:
- case P_LRECNO:
- nbytes = RINTERNAL_PSIZE;
-
- if (P_FREESPACE(ppage) < nbytes)
- return (DB_NEEDSPLIT);
-
- /* Add a new record for the right page. */
- memset(&hdr, 0, sizeof(hdr));
- hdr.data = &ri;
- hdr.size = RINTERNAL_SIZE;
- ri.pgno = rchild->pgno;
- ri.nrecs = nrecs;
- if ((ret = __db_pitem(dbc,
- ppage, off, RINTERNAL_SIZE, &hdr, NULL)) != 0)
- return (ret);
- break;
- default:
- return (__db_pgfmt(dbp, rchild->pgno));
- }
-
- /* Adjust the parent page's left page record count. */
- if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
- /* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
- dbc->txn, &LSN(ppage), 0, dbp->log_fileid,
- PGNO(ppage), &LSN(ppage), (u_int32_t)parent->indx,
- -(int32_t)nrecs, (int32_t)0)) != 0)
- return (ret);
-
- /* Update the left page count. */
- if (dbp->type == DB_RECNO)
- GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
- else
- GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
- }
-
- return (0);
-}
-
-/*
- * __bam_psplit --
- * Do the real work of splitting the page.
- */
-static int
-__bam_psplit(dbc, cp, lp, rp, splitret)
- DBC *dbc;
- EPG *cp;
- PAGE *lp, *rp;
- db_indx_t *splitret;
-{
- DB *dbp;
- PAGE *pp;
- db_indx_t half, nbytes, off, splitp, top;
- int adjust, cnt, isbigkey, ret;
-
- dbp = dbc->dbp;
- pp = cp->page;
- adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
-
- /*
- * If we're splitting the first (last) page on a level because we're
- * inserting (appending) a key to it, it's likely that the data is
- * sorted. Moving a single item to the new page is less work and can
- * push the fill factor higher than normal. If we're wrong it's not
- * a big deal, we'll just do the split the right way next time.
- */
- off = 0;
- if (NEXT_PGNO(pp) == PGNO_INVALID &&
- ((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) ||
- (!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page))))
- off = NUM_ENT(cp->page) - adjust;
- else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
- off = adjust;
-
- if (off != 0)
- goto sort;
-
- /*
- * Split the data to the left and right pages. Try not to split on
- * an overflow key. (Overflow keys on internal pages will slow down
- * searches.) Refuse to split in the middle of a set of duplicates.
- *
- * First, find the optimum place to split.
- *
- * It's possible to try and split past the last record on the page if
- * there's a very large record at the end of the page. Make sure this
- * doesn't happen by bounding the check at the next-to-last entry on
- * the page.
- *
- * Note, we try and split half the data present on the page. This is
- * because another process may have already split the page and left
- * it half empty. We don't try and skip the split -- we don't know
- * how much space we're going to need on the page, and we may need up
- * to half the page for a big item, so there's no easy test to decide
- * if we need to split or not. Besides, if two threads are inserting
- * data into the same place in the database, we're probably going to
- * need more space soon anyway.
- */
- top = NUM_ENT(pp) - adjust;
- half = (dbp->pgsize - HOFFSET(pp)) / 2;
- for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
- switch (TYPE(pp)) {
- case P_IBTREE:
- if (B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len);
- else
- nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
- break;
- case P_LBTREE:
- if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
- else
- nbytes += BOVERFLOW_SIZE;
-
- ++off;
- if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
- else
- nbytes += BOVERFLOW_SIZE;
- break;
- case P_IRECNO:
- nbytes += RINTERNAL_SIZE;
- break;
- case P_LRECNO:
- nbytes += BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
- break;
- default:
- return (__db_pgfmt(dbp, pp->pgno));
- }
-sort: splitp = off;
-
- /*
- * Splitp is either at or just past the optimum split point. If
- * it's a big key, try and find something close by that's not.
- */
- if (TYPE(pp) == P_IBTREE)
- isbigkey = B_TYPE(GET_BINTERNAL(pp, off)->type) != B_KEYDATA;
- else if (TYPE(pp) == P_LBTREE)
- isbigkey = B_TYPE(GET_BKEYDATA(pp, off)->type) != B_KEYDATA;
- else
- isbigkey = 0;
- if (isbigkey)
- for (cnt = 1; cnt <= 3; ++cnt) {
- off = splitp + cnt * adjust;
- if (off < (db_indx_t)NUM_ENT(pp) &&
- ((TYPE(pp) == P_IBTREE &&
- B_TYPE(GET_BINTERNAL(pp,off)->type) == B_KEYDATA) ||
- B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)) {
- splitp = off;
- break;
- }
- if (splitp <= (db_indx_t)(cnt * adjust))
- continue;
- off = splitp - cnt * adjust;
- if (TYPE(pp) == P_IBTREE ?
- B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA :
- B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) {
- splitp = off;
- break;
- }
- }
-
- /*
- * We can't split in the middle a set of duplicates. We know that
- * no duplicate set can take up more than about 25% of the page,
- * because that's the point where we push it off onto a duplicate
- * page set. So, this loop can't be unbounded.
- */
- if (F_ISSET(dbp, DB_AM_DUP) && TYPE(pp) == P_LBTREE &&
- pp->inp[splitp] == pp->inp[splitp - adjust])
- for (cnt = 1;; ++cnt) {
- off = splitp + cnt * adjust;
- if (off < NUM_ENT(pp) &&
- pp->inp[splitp] != pp->inp[off]) {
- splitp = off;
- break;
- }
- if (splitp <= (db_indx_t)(cnt * adjust))
- continue;
- off = splitp - cnt * adjust;
- if (pp->inp[splitp] != pp->inp[off]) {
- splitp = off + adjust;
- break;
- }
- }
-
-
- /* We're going to split at splitp. */
- if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0)
- return (ret);
- if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0)
- return (ret);
-
- *splitret = splitp;
- return (0);
-}
-
-/*
- * __bam_copy --
- * Copy a set of records from one page to another.
- *
- * PUBLIC: int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
- */
-int
-__bam_copy(dbp, pp, cp, nxt, stop)
- DB *dbp;
- PAGE *pp, *cp;
- u_int32_t nxt, stop;
-{
- db_indx_t nbytes, off;
-
- /*
- * Copy the rest of the data to the right page. Nxt is the next
- * offset placed on the target page.
- */
- for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
- switch (TYPE(pp)) {
- case P_IBTREE:
- if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA)
- nbytes =
- BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len);
- else
- nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
- break;
- case P_LBTREE:
- /*
- * If we're on a key and it's a duplicate, just copy
- * the offset.
- */
- if (off != 0 && (nxt % P_INDX) == 0 &&
- pp->inp[nxt] == pp->inp[nxt - P_INDX]) {
- cp->inp[off] = cp->inp[off - P_INDX];
- continue;
- }
- /* FALLTHROUGH */
- case P_LRECNO:
- if (B_TYPE(GET_BKEYDATA(pp, nxt)->type) == B_KEYDATA)
- nbytes =
- BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len);
- else
- nbytes = BOVERFLOW_SIZE;
- break;
- case P_IRECNO:
- nbytes = RINTERNAL_SIZE;
- break;
- default:
- return (__db_pgfmt(dbp, pp->pgno));
- }
- cp->inp[off] = HOFFSET(cp) -= nbytes;
- memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes);
- }
- return (0);
-}
diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c
deleted file mode 100644
index 855ef40bbd..0000000000
--- a/db2/btree/bt_stat.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)bt_stat.c 10.27 (Sleepycat) 11/25/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-
-/*
- * __bam_stat --
- * Gather/print the btree statistics
- *
- * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
- */
-int
-__bam_stat(dbp, spp, db_malloc, flags)
- DB *dbp;
- void *spp;
- void *(*db_malloc) __P((size_t));
- u_int32_t flags;
-{
- BTMETA *meta;
- BTREE *t;
- DBC *dbc;
- DB_BTREE_STAT *sp;
- DB_LOCK lock;
- PAGE *h;
- db_pgno_t lastpgno, pgno;
- int ret, t_ret;
-
- DB_PANIC_CHECK(dbp);
-
- /* Check for invalid flags. */
- if ((ret = __db_statchk(dbp, flags)) != 0)
- return (ret);
-
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, NULL, "bam_stat", NULL, NULL, flags);
-
- t = dbp->internal;
-
- if (spp == NULL)
- return (0);
-
- /* Allocate and clear the structure. */
- if ((ret = __os_malloc(sizeof(*sp), db_malloc, &sp)) != 0)
- goto err;
- memset(sp, 0, sizeof(*sp));
-
- /* If the app just wants the record count, make it fast. */
- if (flags == DB_RECORDCOUNT) {
- pgno = PGNO_ROOT;
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&h)) != 0)
- goto err;
-
- sp->bt_nrecs = RE_NREC(h);
-
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
- goto done;
- }
-
- /* Get the meta-data page. */
- pgno = PGNO_METADATA;
- if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0)
- goto err;
-
- /* Translate the metadata flags. */
- if (F_ISSET(meta, BTM_DUP))
- sp->bt_flags |= DB_DUP;
- if (F_ISSET(meta, BTM_FIXEDLEN))
- sp->bt_flags |= DB_FIXEDLEN;
- if (F_ISSET(meta, BTM_RECNUM))
- sp->bt_flags |= DB_RECNUM;
- if (F_ISSET(meta, BTM_RENUMBER))
- sp->bt_flags |= DB_RENUMBER;
-
- /* Get the remaining metadata fields. */
- sp->bt_minkey = meta->minkey;
- sp->bt_maxkey = meta->maxkey;
- sp->bt_re_len = meta->re_len;
- sp->bt_re_pad = meta->re_pad;
- sp->bt_magic = meta->magic;
- sp->bt_version = meta->version;
-
- /* Get the page size from the DB. */
- sp->bt_pagesize = dbp->pgsize;
-
- /* Walk the free list, counting pages. */
- for (sp->bt_free = 0, pgno = meta->free; pgno != PGNO_INVALID;) {
- ++sp->bt_free;
-
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
- (void)memp_fput(dbp->mpf, meta, 0);
- (void)__BT_TLPUT(dbc, lock);
- goto err;
- }
- pgno = h->next_pgno;
- (void)memp_fput(dbp->mpf, h, 0);
- }
-
- /* Discard the meta-data page. */
- (void)memp_fput(dbp->mpf, meta, 0);
- (void)__BT_TLPUT(dbc, lock);
-
- /* Determine the last page of the database. */
- if ((ret = memp_fget(dbp->mpf, &lastpgno, DB_MPOOL_LAST, &h)) != 0)
- goto err;
- (void)memp_fput(dbp->mpf, h, 0);
-
- /* Get the root page. */
- pgno = PGNO_ROOT;
- if ((ret = __bam_lget(dbc, 0, PGNO_ROOT, DB_LOCK_READ, &lock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
- (void)__BT_LPUT(dbc, lock);
- goto err;
- }
-
- /* Get the levels from the root page. */
- sp->bt_levels = h->level;
-
- /* Walk the page list, counting things. */
- for (;;) {
- switch (TYPE(h)) {
- case P_INVALID:
- break;
- case P_IBTREE:
- case P_IRECNO:
- ++sp->bt_int_pg;
- sp->bt_int_pgfree += HOFFSET(h) - LOFFSET(h);
- break;
- case P_LBTREE:
- ++sp->bt_leaf_pg;
- sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h);
- sp->bt_nrecs += NUM_ENT(h) / P_INDX;
- break;
- case P_LRECNO:
- ++sp->bt_leaf_pg;
- sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h);
- sp->bt_nrecs += NUM_ENT(h);
- break;
- case P_DUPLICATE:
- ++sp->bt_dup_pg;
- /* XXX MARGO: sp->bt_dup_pgfree; */
- break;
- case P_OVERFLOW:
- ++sp->bt_over_pg;
- /* XXX MARGO: sp->bt_over_pgfree; */
- break;
- default:
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
- return (__db_pgfmt(dbp, pgno));
- }
-
- (void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbc, lock);
-
- if (++pgno > lastpgno)
- break;
- if (__bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock))
- break;
- if (memp_fget(dbp->mpf, &pgno, 0, &h) != 0) {
- (void)__BT_LPUT(dbc, lock);
- break;
- }
- }
-
-done: *(DB_BTREE_STAT **)spp = sp;
- ret = 0;
-
-err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
deleted file mode 100644
index 95ea76e2cd..0000000000
--- a/db2/btree/btree_auto.c
+++ /dev/null
@@ -1,1508 +0,0 @@
-/* Do not edit: automatically built by dist/db_gen.sh. */
-#include "config.h"
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <ctype.h>
-#include <errno.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "db_dispatch.h"
-#include "btree.h"
-#include "db_am.h"
-/*
- * PUBLIC: int __bam_pg_alloc_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t,
- * PUBLIC: u_int32_t, db_pgno_t));
- */
-int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
- fileid, meta_lsn, page_lsn, pgno, ptype, next)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- DB_LSN * meta_lsn;
- DB_LSN * page_lsn;
- db_pgno_t pgno;
- u_int32_t ptype;
- db_pgno_t next;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_pg_alloc;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(*meta_lsn)
- + sizeof(*page_lsn)
- + sizeof(pgno)
- + sizeof(ptype)
- + sizeof(next);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- if (meta_lsn != NULL)
- memcpy(bp, meta_lsn, sizeof(*meta_lsn));
- else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
- if (page_lsn != NULL)
- memcpy(bp, page_lsn, sizeof(*page_lsn));
- else
- memset(bp, 0, sizeof(*page_lsn));
- bp += sizeof(*page_lsn);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- memcpy(bp, &ptype, sizeof(ptype));
- bp += sizeof(ptype);
- memcpy(bp, &next, sizeof(next));
- bp += sizeof(next);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_pg_alloc_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_pg_alloc_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_pg_alloc_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_pg_alloc_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_pg_alloc: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\tpage_lsn: [%lu][%lu]\n",
- (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tptype: %lu\n", (u_long)argp->ptype);
- printf("\tnext: %lu\n", (u_long)argp->next);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_pg_alloc_read __P((void *, __bam_pg_alloc_args **));
- */
-int
-__bam_pg_alloc_read(recbuf, argpp)
- void *recbuf;
- __bam_pg_alloc_args **argpp;
-{
- __bam_pg_alloc_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_pg_alloc_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
- bp += sizeof(argp->meta_lsn);
- memcpy(&argp->page_lsn, bp, sizeof(argp->page_lsn));
- bp += sizeof(argp->page_lsn);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->ptype, bp, sizeof(argp->ptype));
- bp += sizeof(argp->ptype);
- memcpy(&argp->next, bp, sizeof(argp->next));
- bp += sizeof(argp->next);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_pg_free_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, const DBT *,
- * PUBLIC: db_pgno_t));
- */
-int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, meta_lsn, header, next)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * meta_lsn;
- const DBT *header;
- db_pgno_t next;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_pg_free;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*meta_lsn)
- + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
- + sizeof(next);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (meta_lsn != NULL)
- memcpy(bp, meta_lsn, sizeof(*meta_lsn));
- else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
- if (header == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &header->size, sizeof(header->size));
- bp += sizeof(header->size);
- memcpy(bp, header->data, header->size);
- bp += header->size;
- }
- memcpy(bp, &next, sizeof(next));
- bp += sizeof(next);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_pg_free_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_pg_free_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_pg_free_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_pg_free_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_pg_free: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tnext: %lu\n", (u_long)argp->next);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_pg_free_read __P((void *, __bam_pg_free_args **));
- */
-int
-__bam_pg_free_read(recbuf, argpp)
- void *recbuf;
- __bam_pg_free_args **argpp;
-{
- __bam_pg_free_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_pg_free_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
- bp += sizeof(argp->meta_lsn);
- memcpy(&argp->header.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
- memcpy(&argp->next, bp, sizeof(argp->next));
- bp += sizeof(argp->next);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_split_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
- * PUBLIC: DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *,
- * PUBLIC: const DBT *));
- */
-int __bam_split_log(logp, txnid, ret_lsnp, flags,
- fileid, left, llsn, right, rlsn, indx,
- npgno, nlsn, pg)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t left;
- DB_LSN * llsn;
- db_pgno_t right;
- DB_LSN * rlsn;
- u_int32_t indx;
- db_pgno_t npgno;
- DB_LSN * nlsn;
- const DBT *pg;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_split;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(left)
- + sizeof(*llsn)
- + sizeof(right)
- + sizeof(*rlsn)
- + sizeof(indx)
- + sizeof(npgno)
- + sizeof(*nlsn)
- + sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &left, sizeof(left));
- bp += sizeof(left);
- if (llsn != NULL)
- memcpy(bp, llsn, sizeof(*llsn));
- else
- memset(bp, 0, sizeof(*llsn));
- bp += sizeof(*llsn);
- memcpy(bp, &right, sizeof(right));
- bp += sizeof(right);
- if (rlsn != NULL)
- memcpy(bp, rlsn, sizeof(*rlsn));
- else
- memset(bp, 0, sizeof(*rlsn));
- bp += sizeof(*rlsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &npgno, sizeof(npgno));
- bp += sizeof(npgno);
- if (nlsn != NULL)
- memcpy(bp, nlsn, sizeof(*nlsn));
- else
- memset(bp, 0, sizeof(*nlsn));
- bp += sizeof(*nlsn);
- if (pg == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &pg->size, sizeof(pg->size));
- bp += sizeof(pg->size);
- memcpy(bp, pg->data, pg->size);
- bp += pg->size;
- }
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_split_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_split_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_split_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_split_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_split: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tleft: %lu\n", (u_long)argp->left);
- printf("\tllsn: [%lu][%lu]\n",
- (u_long)argp->llsn.file, (u_long)argp->llsn.offset);
- printf("\tright: %lu\n", (u_long)argp->right);
- printf("\trlsn: [%lu][%lu]\n",
- (u_long)argp->rlsn.file, (u_long)argp->rlsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tnpgno: %lu\n", (u_long)argp->npgno);
- printf("\tnlsn: [%lu][%lu]\n",
- (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset);
- printf("\tpg: ");
- for (i = 0; i < argp->pg.size; i++) {
- ch = ((u_int8_t *)argp->pg.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_split_read __P((void *, __bam_split_args **));
- */
-int
-__bam_split_read(recbuf, argpp)
- void *recbuf;
- __bam_split_args **argpp;
-{
- __bam_split_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_split_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->left, bp, sizeof(argp->left));
- bp += sizeof(argp->left);
- memcpy(&argp->llsn, bp, sizeof(argp->llsn));
- bp += sizeof(argp->llsn);
- memcpy(&argp->right, bp, sizeof(argp->right));
- bp += sizeof(argp->right);
- memcpy(&argp->rlsn, bp, sizeof(argp->rlsn));
- bp += sizeof(argp->rlsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->npgno, bp, sizeof(argp->npgno));
- bp += sizeof(argp->npgno);
- memcpy(&argp->nlsn, bp, sizeof(argp->nlsn));
- bp += sizeof(argp->nlsn);
- memcpy(&argp->pg.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->pg.data = bp;
- bp += argp->pg.size;
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_rsplit_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, const DBT *, db_pgno_t,
- * PUBLIC: const DBT *, DB_LSN *));
- */
-int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, pgdbt, nrec, rootent, rootlsn)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- const DBT *pgdbt;
- db_pgno_t nrec;
- const DBT *rootent;
- DB_LSN * rootlsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_rsplit;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size)
- + sizeof(nrec)
- + sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size)
- + sizeof(*rootlsn);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (pgdbt == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &pgdbt->size, sizeof(pgdbt->size));
- bp += sizeof(pgdbt->size);
- memcpy(bp, pgdbt->data, pgdbt->size);
- bp += pgdbt->size;
- }
- memcpy(bp, &nrec, sizeof(nrec));
- bp += sizeof(nrec);
- if (rootent == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &rootent->size, sizeof(rootent->size));
- bp += sizeof(rootent->size);
- memcpy(bp, rootent->data, rootent->size);
- bp += rootent->size;
- }
- if (rootlsn != NULL)
- memcpy(bp, rootlsn, sizeof(*rootlsn));
- else
- memset(bp, 0, sizeof(*rootlsn));
- bp += sizeof(*rootlsn);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_rsplit_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_rsplit_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_rsplit_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_rsplit_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_rsplit: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tpgdbt: ");
- for (i = 0; i < argp->pgdbt.size; i++) {
- ch = ((u_int8_t *)argp->pgdbt.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tnrec: %lu\n", (u_long)argp->nrec);
- printf("\trootent: ");
- for (i = 0; i < argp->rootent.size; i++) {
- ch = ((u_int8_t *)argp->rootent.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\trootlsn: [%lu][%lu]\n",
- (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_rsplit_read __P((void *, __bam_rsplit_args **));
- */
-int
-__bam_rsplit_read(recbuf, argpp)
- void *recbuf;
- __bam_rsplit_args **argpp;
-{
- __bam_rsplit_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_rsplit_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->pgdbt.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->pgdbt.data = bp;
- bp += argp->pgdbt.size;
- memcpy(&argp->nrec, bp, sizeof(argp->nrec));
- bp += sizeof(argp->nrec);
- memcpy(&argp->rootent.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->rootent.data = bp;
- bp += argp->rootent.size;
- memcpy(&argp->rootlsn, bp, sizeof(argp->rootlsn));
- bp += sizeof(argp->rootlsn);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_adj_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, u_int32_t));
- */
-int __bam_adj_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx, indx_copy, is_insert)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * lsn;
- u_int32_t indx;
- u_int32_t indx_copy;
- u_int32_t is_insert;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_adj;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*lsn)
- + sizeof(indx)
- + sizeof(indx_copy)
- + sizeof(is_insert);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (lsn != NULL)
- memcpy(bp, lsn, sizeof(*lsn));
- else
- memset(bp, 0, sizeof(*lsn));
- bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &indx_copy, sizeof(indx_copy));
- bp += sizeof(indx_copy);
- memcpy(bp, &is_insert, sizeof(is_insert));
- bp += sizeof(is_insert);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_adj_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_adj_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_adj_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_adj_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_adj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy);
- printf("\tis_insert: %lu\n", (u_long)argp->is_insert);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_adj_read __P((void *, __bam_adj_args **));
- */
-int
-__bam_adj_read(recbuf, argpp)
- void *recbuf;
- __bam_adj_args **argpp;
-{
- __bam_adj_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_adj_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->lsn, bp, sizeof(argp->lsn));
- bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->indx_copy, bp, sizeof(argp->indx_copy));
- bp += sizeof(argp->indx_copy);
- memcpy(&argp->is_insert, bp, sizeof(argp->is_insert));
- bp += sizeof(argp->is_insert);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_cadjust_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
- * PUBLIC: int32_t, int32_t));
- */
-int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx, adjust, total)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * lsn;
- u_int32_t indx;
- int32_t adjust;
- int32_t total;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_cadjust;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*lsn)
- + sizeof(indx)
- + sizeof(adjust)
- + sizeof(total);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (lsn != NULL)
- memcpy(bp, lsn, sizeof(*lsn));
- else
- memset(bp, 0, sizeof(*lsn));
- bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &adjust, sizeof(adjust));
- bp += sizeof(adjust);
- memcpy(bp, &total, sizeof(total));
- bp += sizeof(total);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_cadjust_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_cadjust_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_cadjust_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_cadjust_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_cadjust: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tadjust: %ld\n", (long)argp->adjust);
- printf("\ttotal: %ld\n", (long)argp->total);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_cadjust_read __P((void *, __bam_cadjust_args **));
- */
-int
-__bam_cadjust_read(recbuf, argpp)
- void *recbuf;
- __bam_cadjust_args **argpp;
-{
- __bam_cadjust_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_cadjust_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->lsn, bp, sizeof(argp->lsn));
- bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->adjust, bp, sizeof(argp->adjust));
- bp += sizeof(argp->adjust);
- memcpy(&argp->total, bp, sizeof(argp->total));
- bp += sizeof(argp->total);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_cdel_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t));
- */
-int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * lsn;
- u_int32_t indx;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_cdel;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*lsn)
- + sizeof(indx);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (lsn != NULL)
- memcpy(bp, lsn, sizeof(*lsn));
- else
- memset(bp, 0, sizeof(*lsn));
- bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_cdel_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_cdel_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_cdel_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_cdel_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_cdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_cdel_read __P((void *, __bam_cdel_args **));
- */
-int
-__bam_cdel_read(recbuf, argpp)
- void *recbuf;
- __bam_cdel_args **argpp;
-{
- __bam_cdel_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_cdel_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->lsn, bp, sizeof(argp->lsn));
- bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_repl_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, const DBT *, const DBT *, u_int32_t,
- * PUBLIC: u_int32_t));
- */
-int __bam_repl_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx, isdeleted, orig,
- repl, prefix, suffix)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * lsn;
- u_int32_t indx;
- u_int32_t isdeleted;
- const DBT *orig;
- const DBT *repl;
- u_int32_t prefix;
- u_int32_t suffix;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_repl;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*lsn)
- + sizeof(indx)
- + sizeof(isdeleted)
- + sizeof(u_int32_t) + (orig == NULL ? 0 : orig->size)
- + sizeof(u_int32_t) + (repl == NULL ? 0 : repl->size)
- + sizeof(prefix)
- + sizeof(suffix);
- if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (lsn != NULL)
- memcpy(bp, lsn, sizeof(*lsn));
- else
- memset(bp, 0, sizeof(*lsn));
- bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &isdeleted, sizeof(isdeleted));
- bp += sizeof(isdeleted);
- if (orig == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &orig->size, sizeof(orig->size));
- bp += sizeof(orig->size);
- memcpy(bp, orig->data, orig->size);
- bp += orig->size;
- }
- if (repl == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &repl->size, sizeof(repl->size));
- bp += sizeof(repl->size);
- memcpy(bp, repl->data, repl->size);
- bp += repl->size;
- }
- memcpy(bp, &prefix, sizeof(prefix));
- bp += sizeof(prefix);
- memcpy(bp, &suffix, sizeof(suffix));
- bp += sizeof(suffix);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, 0);
- return (ret);
-}
-
-/*
- * PUBLIC: int __bam_repl_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__bam_repl_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __bam_repl_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_repl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
- printf("\torig: ");
- for (i = 0; i < argp->orig.size; i++) {
- ch = ((u_int8_t *)argp->orig.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\trepl: ");
- for (i = 0; i < argp->repl.size; i++) {
- ch = ((u_int8_t *)argp->repl.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tprefix: %lu\n", (u_long)argp->prefix);
- printf("\tsuffix: %lu\n", (u_long)argp->suffix);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_repl_read __P((void *, __bam_repl_args **));
- */
-int
-__bam_repl_read(recbuf, argpp)
- void *recbuf;
- __bam_repl_args **argpp;
-{
- __bam_repl_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(sizeof(__bam_repl_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->lsn, bp, sizeof(argp->lsn));
- bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->isdeleted, bp, sizeof(argp->isdeleted));
- bp += sizeof(argp->isdeleted);
- memcpy(&argp->orig.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->orig.data = bp;
- bp += argp->orig.size;
- memcpy(&argp->repl.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->repl.data = bp;
- bp += argp->repl.size;
- memcpy(&argp->prefix, bp, sizeof(argp->prefix));
- bp += sizeof(argp->prefix);
- memcpy(&argp->suffix, bp, sizeof(argp->suffix));
- bp += sizeof(argp->suffix);
- *argpp = argp;
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_init_print __P((DB_ENV *));
- */
-int
-__bam_init_print(dbenv)
- DB_ENV *dbenv;
-{
- int ret;
-
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_alloc_print, DB_bam_pg_alloc)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_free_print, DB_bam_pg_free)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_split_print, DB_bam_split)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rsplit_print, DB_bam_rsplit)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_adj_print, DB_bam_adj)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cadjust_print, DB_bam_cadjust)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cdel_print, DB_bam_cdel)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_repl_print, DB_bam_repl)) != 0)
- return (ret);
- return (0);
-}
-
-/*
- * PUBLIC: int __bam_init_recover __P((DB_ENV *));
- */
-int
-__bam_init_recover(dbenv)
- DB_ENV *dbenv;
-{
- int ret;
-
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_alloc_recover, DB_bam_pg_alloc)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_free_recover, DB_bam_pg_free)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_split_recover, DB_bam_split)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rsplit_recover, DB_bam_rsplit)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_adj_recover, DB_bam_adj)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cadjust_recover, DB_bam_cadjust)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cdel_recover, DB_bam_cdel)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_repl_recover, DB_bam_repl)) != 0)
- return (ret);
- return (0);
-}
-