aboutsummaryrefslogtreecommitdiff
path: root/db2/btree/bt_rsearch.c
diff options
context:
space:
mode:
Diffstat (limited to 'db2/btree/bt_rsearch.c')
-rw-r--r--db2/btree/bt_rsearch.c164
1 files changed, 86 insertions, 78 deletions
diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c
index caa6b3515e..8efe4059a8 100644
--- a/db2/btree/bt_rsearch.c
+++ b/db2/btree/bt_rsearch.c
@@ -44,7 +44,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_rsearch.c 10.15 (Sleepycat) 5/6/98";
+static const char sccsid[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -59,39 +59,37 @@ static const char sccsid[] = "@(#)bt_rsearch.c 10.15 (Sleepycat) 5/6/98";
* __bam_rsearch --
* Search a btree for a record number.
*
- * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *));
+ * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
*/
int
-__bam_rsearch(dbp, recnop, flags, stop, exactp)
- DB *dbp;
+__bam_rsearch(dbc, recnop, flags, stop, exactp)
+ DBC *dbc;
db_recno_t *recnop;
u_int32_t flags;
int stop, *exactp;
{
BINTERNAL *bi;
- BTREE *t;
+ CURSOR *cp;
+ DB *dbp;
DB_LOCK lock;
PAGE *h;
RINTERNAL *ri;
db_indx_t indx, top;
db_pgno_t pg;
db_recno_t i, recno, total;
- int isappend, ret, stack;
+ int ret, stack;
- t = dbp->internal;
+ dbp = dbc->dbp;
+ cp = dbc->internal;
- /*
- * We test for groups of flags, S_APPEND is the only one that can be
- * OR'd into the set. Clear it now so that the tests for equality
- * will work.
- */
- if ((isappend = LF_ISSET(S_APPEND)) != 0)
- LF_CLR(S_APPEND);
+ BT_STK_CLR(cp);
/*
* There are several ways we search a btree tree. The flags argument
* specifies if we're acquiring read or write locks and if we are
- * locking pairs of pages. See btree.h for more details.
+ * locking pairs of pages. In addition, if we're adding or deleting
+ * an item, we have to lock the entire tree, regardless. See btree.h
+ * for more details.
*
* If write-locking pages, we need to know whether or not to acquire a
* write lock on a page before getting it. This depends on how deep it
@@ -102,15 +100,36 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
* Retrieve the root page.
*/
pg = PGNO_ROOT;
- if ((ret = __bam_lget(dbp, 0, PGNO_ROOT,
- flags == S_INSERT || flags == S_DELETE ?
- DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
+ stack = LF_ISSET(S_STACK);
+ if ((ret = __bam_lget(dbc,
+ 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
return (ret);
- if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
- (void)__BT_LPUT(dbp, lock);
+ if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ (void)__BT_LPUT(dbc, lock);
return (ret);
}
- total = RE_NREC(h);
+
+ /*
+ * Decide if we need to save this page; if we do, write lock it.
+ * We deliberately don't lock-couple on this call. If the tree
+ * is tiny, i.e., one page, and two threads are busily updating
+ * the root page, we're almost guaranteed deadlocks galore, as
+ * each one gets a read lock and then blocks the other's attempt
+ * for a write lock.
+ */
+ if (!stack &&
+ ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
+ (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
+ (void)memp_fput(dbp->mpf, h, 0);
+ (void)__BT_LPUT(dbc, lock);
+ if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
+ return (ret);
+ if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ (void)__BT_LPUT(dbc, lock);
+ return (ret);
+ }
+ stack = 1;
+ }
/*
* If appending to the tree, set the record number now -- we have the
@@ -124,7 +143,8 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
* for the record immediately after the last record in the tree, so do
* a fast check now.
*/
- if (isappend) {
+ total = RE_NREC(h);
+ if (LF_ISSET(S_APPEND)) {
*exactp = 0;
*recnop = recno = total + 1;
} else {
@@ -133,33 +153,14 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
*exactp = 1;
else {
*exactp = 0;
- if (!PAST_END_OK(flags) || recno > total + 1) {
+ if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
(void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_LPUT(dbp, lock);
+ (void)__BT_LPUT(dbc, lock);
return (DB_NOTFOUND);
}
}
}
- /* Decide if we're building a stack based on the operation. */
- BT_STK_CLR(t);
- stack = flags == S_DELETE || flags == S_INSERT;
-
- /*
- * Decide if we need to save this page; if we do, write lock it, and
- * start to build a stack.
- */
- if (LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) {
- (void)memp_fput(dbp->mpf, h, 0);
- if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
- return (ret);
- if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
- (void)__BT_LPUT(dbp, lock);
- return (ret);
- }
- stack = 1;
- }
-
/*
* !!!
* Record numbers in the tree are 0-based, but the recno is
@@ -177,7 +178,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
* not exist if there are enough deleted records in the
* page.
*/
- if (recno <= NUM_ENT(h))
+ if (recno <= (db_recno_t)NUM_ENT(h) / P_INDX)
for (i = recno - 1;; --i) {
if (B_DISSET(GET_BKEYDATA(h,
i * P_INDX + O_INDX)->type))
@@ -185,10 +186,10 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
if (i == 0)
break;
}
- if (recno > NUM_ENT(h)) {
+ if (recno > (db_recno_t)NUM_ENT(h) / P_INDX) {
*exactp = 0;
- if (!PAST_END_OK(flags) ||
- recno > (db_recno_t)(NUM_ENT(h) + 1)) {
+ if (!LF_ISSET(S_PAST_EOF) || recno >
+ (db_recno_t)(NUM_ENT(h) / P_INDX + 1)) {
ret = DB_NOTFOUND;
goto err;
}
@@ -197,7 +198,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
/* Correct from 1-based to 0-based for a page offset. */
--recno;
- BT_STK_ENTER(t, h, recno * P_INDX, lock, ret);
+ BT_STK_ENTER(cp, h, recno * P_INDX, lock, ret);
return (ret);
case P_IBTREE:
for (indx = 0, top = NUM_ENT(h);;) {
@@ -213,7 +214,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
/* Correct from 1-based to 0-based for a page offset. */
--recno;
- BT_STK_ENTER(t, h, recno, lock, ret);
+ BT_STK_ENTER(cp, h, recno, lock, ret);
return (ret);
case P_IRECNO:
for (indx = 0, top = NUM_ENT(h);;) {
@@ -232,42 +233,42 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
if (stack) {
/* Return if this is the lowest page wanted. */
if (LF_ISSET(S_PARENT) && stop == h->level) {
- BT_STK_ENTER(t, h, indx, lock, ret);
+ BT_STK_ENTER(cp, h, indx, lock, ret);
return (ret);
}
- BT_STK_PUSH(t, h, indx, lock, ret);
- if (ret)
+ BT_STK_PUSH(cp, h, indx, lock, ret);
+ if (ret != 0)
goto err;
- if ((ret = __bam_lget(dbp, 0, pg,
- LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
- &lock)) != 0)
+ if ((ret =
+ __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
goto err;
} else {
- (void)memp_fput(dbp->mpf, h, 0);
-
/*
* Decide if we want to return a pointer to the next
* page in the stack. If we do, write lock it and
* never unlock it.
*/
- if (LF_ISSET(S_PARENT) &&
- (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1))
+ if ((LF_ISSET(S_PARENT) &&
+ (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
+ (h->level - 1) == LEAFLEVEL)
stack = 1;
- if ((ret = __bam_lget(dbp, 1, pg,
- LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
- &lock)) != 0)
+ (void)memp_fput(dbp->mpf, h, 0);
+
+ if ((ret =
+ __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ?
+ DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
goto err;
}
- if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
+ if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
goto err;
}
/* NOTREACHED */
-err: BT_STK_POP(t);
- __bam_stkrel(dbp);
+err: BT_STK_POP(cp);
+ __bam_stkrel(dbc, 0);
return (ret);
}
@@ -275,25 +276,29 @@ err: BT_STK_POP(t);
* __bam_adjust --
* Adjust the tree after adding or deleting a record.
*
- * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int32_t));
+ * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
*/
int
-__bam_adjust(dbp, t, adjust)
- DB *dbp;
- BTREE *t;
+__bam_adjust(dbc, adjust)
+ DBC *dbc;
int32_t adjust;
{
+ CURSOR *cp;
+ DB *dbp;
EPG *epg;
PAGE *h;
int ret;
+ dbp = dbc->dbp;
+ cp = dbc->internal;
+
/* Update the record counts for the tree. */
- for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
+ for (epg = cp->sp; epg <= cp->csp; ++epg) {
h = epg->page;
if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
- if (DB_LOGGING(dbp) &&
+ if (DB_LOGGING(dbc) &&
(ret = __bam_cadjust_log(dbp->dbenv->lg_info,
- dbp->txn, &LSN(h), 0, dbp->log_fileid,
+ dbc->txn, &LSN(h), 0, dbp->log_fileid,
PGNO(h), &LSN(h), (u_int32_t)epg->indx,
adjust, 1)) != 0)
return (ret);
@@ -317,28 +322,31 @@ __bam_adjust(dbp, t, adjust)
* __bam_nrecs --
* Return the number of records in the tree.
*
- * PUBLIC: int __bam_nrecs __P((DB *, db_recno_t *));
+ * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
*/
int
-__bam_nrecs(dbp, rep)
- DB *dbp;
+__bam_nrecs(dbc, rep)
+ DBC *dbc;
db_recno_t *rep;
{
+ DB *dbp;
DB_LOCK lock;
PAGE *h;
db_pgno_t pgno;
int ret;
+ dbp = dbc->dbp;
+
pgno = PGNO_ROOT;
- if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
+ if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0)
return (ret);
- if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
return (ret);
*rep = RE_NREC(h);
(void)memp_fput(dbp->mpf, h, 0);
- (void)__BT_TLPUT(dbp, lock);
+ (void)__BT_TLPUT(dbc, lock);
return (0);
}