aboutsummaryrefslogtreecommitdiff
path: root/db2/db
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-06-13 13:36:34 +0000
committerUlrich Drepper <drepper@redhat.com>1999-06-13 13:36:34 +0000
commitec239360d13518a13f572b635d036c7d10028010 (patch)
treebdb5111363f45d2107849c2456b575d72779174c /db2/db
parentfc3703521650a9b6db910a50c4fc0f410496e134 (diff)
downloadglibc-ec239360d13518a13f572b635d036c7d10028010.tar
glibc-ec239360d13518a13f572b635d036c7d10028010.tar.gz
glibc-ec239360d13518a13f572b635d036c7d10028010.tar.bz2
glibc-ec239360d13518a13f572b635d036c7d10028010.zip
Update.
* db2/Makefile (distribute): Remove files which do not exist anymore.
Diffstat (limited to 'db2/db')
-rw-r--r--db2/db/db.c313
-rw-r--r--db2/db/db.src13
-rw-r--r--db2/db/db_am.c430
-rw-r--r--db2/db/db_auto.c299
-rw-r--r--db2/db/db_dispatch.c41
-rw-r--r--db2/db/db_dup.c511
-rw-r--r--db2/db/db_iface.c488
-rw-r--r--db2/db/db_join.c271
-rw-r--r--db2/db/db_overflow.c129
-rw-r--r--db2/db/db_pr.c110
-rw-r--r--db2/db/db_rec.c155
-rw-r--r--db2/db/db_ret.c21
-rw-r--r--db2/db/db_thread.c121
13 files changed, 1997 insertions, 905 deletions
diff --git a/db2/db/db.c b/db2/db/db.c
index 70c6c5443b..2b4c270324 100644
--- a/db2/db/db.c
+++ b/db2/db/db.c
@@ -44,7 +44,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db.c 10.57 (Sleepycat) 5/7/98";
+static const char sccsid[] = "@(#)db.c 10.75 (Sleepycat) 12/3/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -67,9 +67,6 @@ static const char sccsid[] = "@(#)db.c 10.57 (Sleepycat) 5/7/98";
#include "db_am.h"
#include "common_ext.h"
-static int db_close __P((DB *, u_int32_t));
-static int db_fd __P((DB *, int *));
-
/*
* If the metadata page has the flag set, set the local flag. If the page
* does NOT have the flag set, return EINVAL if the user's dbinfo argument
@@ -87,11 +84,6 @@ static int db_fd __P((DB *, int *));
} \
}
-#ifdef _LIBC
-#define db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) \
- __nss_db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
-#endif
-
/*
* db_open --
* Main library interface to the DB access methods.
@@ -141,9 +133,10 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
/*
* Specifying a cachesize to db_open(3), after creating an
- * environment, is a common mistake.
+ * environment with DB_INIT_MPOOL, is a common mistake.
*/
- if (dbinfo != NULL && dbinfo->db_cachesize != 0) {
+ if (dbenv->mp_info != NULL &&
+ dbinfo != NULL && dbinfo->db_cachesize != 0) {
__db_err(dbenv,
"cachesize will be ignored if environment exists");
return (EINVAL);
@@ -156,12 +149,16 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
real_name = NULL;
/* Allocate the DB structure, reference the DB_ENV structure. */
- if ((dbp = (DB *)__db_calloc(1, sizeof(DB))) == NULL) {
- __db_err(dbenv, "%s", strerror(ENOMEM));
- return (ENOMEM);
- }
+ if ((ret = __os_calloc(1, sizeof(DB), &dbp)) != 0)
+ return (ret);
dbp->dbenv = dbenv;
+ /* Random initialization. */
+ TAILQ_INIT(&dbp->free_queue);
+ TAILQ_INIT(&dbp->active_queue);
+ if ((ret = __db_init_wrapper(dbp)) != 0)
+ goto err;
+
/* Convert the db_open(3) flags. */
if (LF_ISSET(DB_RDONLY))
F_SET(dbp, DB_AM_RDONLY);
@@ -192,21 +189,16 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
}
/*
- * Always set the master and initialize the queues, so we can
- * use these fields without checking the thread bit.
- */
- dbp->master = dbp;
- LIST_INIT(&dbp->handleq);
- LIST_INSERT_HEAD(&dbp->handleq, dbp, links);
- TAILQ_INIT(&dbp->curs_queue);
-
- /*
* Set based on the dbenv fields, although no logging or transactions
* are possible for temporary files.
*/
if (dbenv != NULL) {
- if (dbenv->lk_info != NULL)
- F_SET(dbp, DB_AM_LOCKING);
+ if (dbenv->lk_info != NULL) {
+ if (F_ISSET(dbenv, DB_ENV_CDB))
+ F_SET(dbp, DB_AM_CDB);
+ else
+ F_SET(dbp, DB_AM_LOCKING);
+ }
if (fname != NULL && dbenv->lg_info != NULL)
F_SET(dbp, DB_AM_LOGGING);
}
@@ -215,9 +207,29 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
if (dbinfo == NULL) {
dbp->pgsize = 0;
dbp->db_malloc = NULL;
+ dbp->dup_compare = NULL;
} else {
+ /*
+ * We don't want anything that's not a power-of-2, as we rely
+ * on that for alignment of various types on the pages.
+ */
+ if ((dbp->pgsize = dbinfo->db_pagesize) != 0 &&
+ (u_int32_t)1 << __db_log2(dbp->pgsize) != dbp->pgsize) {
+ __db_err(dbenv, "page sizes must be a power-of-2");
+ goto einval;
+ }
dbp->pgsize = dbinfo->db_pagesize;
dbp->db_malloc = dbinfo->db_malloc;
+ if (F_ISSET(dbinfo, DB_DUPSORT)) {
+ if (F_ISSET(dbinfo, DB_DUP))
+ dbp->dup_compare = dbinfo->dup_compare == NULL ?
+ __bam_defcmp : dbinfo->dup_compare;
+ else {
+ __db_err(dbenv, "DB_DUPSORT requires DB_DUP");
+ goto einval;
+ }
+ F_CLR(dbinfo, DB_DUPSORT);
+ }
}
/* Fill in the default file mode. */
@@ -235,6 +247,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
default:
goto err;
}
+ dbp->byteswapped = F_ISSET(dbp, DB_AM_SWAP) ? 1 : 0;
/*
* If we have a file name, try and read the first page, figure out
@@ -289,7 +302,7 @@ open_retry: if (LF_ISSET(DB_CREATE)) {
* sizes, we limit the default pagesize to 16K.
*/
if (dbp->pgsize == 0) {
- if ((ret = __db_ioinfo(real_name,
+ if ((ret = __os_ioinfo(real_name,
fd, NULL, NULL, &iopsize)) != 0) {
__db_err(dbenv,
"%s: %s", real_name, strerror(ret));
@@ -299,6 +312,14 @@ open_retry: if (LF_ISSET(DB_CREATE)) {
iopsize = 512;
if (iopsize > 16 * 1024)
iopsize = 16 * 1024;
+
+ /*
+ * Sheer paranoia, but we don't want anything that's
+ * not a power-of-2, as we rely on that for alignment
+ * of various types on the pages.
+ */
+ DB_ROUNDOFF(iopsize, 512);
+
dbp->pgsize = iopsize;
F_SET(dbp, DB_AM_PGDEF);
}
@@ -308,11 +329,11 @@ open_retry: if (LF_ISSET(DB_CREATE)) {
* that the meta-data for all access methods fits in 512
* bytes, and that no database will be smaller than that.
*/
- if ((ret = __db_read(fd, mbuf, sizeof(mbuf), &nr)) != 0)
+ if ((ret = __os_read(fd, mbuf, sizeof(mbuf), &nr)) != 0)
goto err;
/* The fd is no longer needed. */
- (void)__db_close(fd);
+ (void)__os_close(fd);
fd = -1;
if (nr != sizeof(mbuf)) {
@@ -337,7 +358,7 @@ open_retry: if (LF_ISSET(DB_CREATE)) {
*/
if (retry_cnt++ < 3 &&
!LF_ISSET(DB_CREATE | DB_TRUNCATE)) {
- __db_sleep(1, 0);
+ __os_sleep(1, 0);
goto open_retry;
}
if (type == DB_UNKNOWN) {
@@ -396,7 +417,7 @@ retry: switch (((BTMETA *)mbuf)->magic) {
/* Copy the file's unique id. */
need_fileid = 0;
- memcpy(dbp->lock.fileid, btm->uid, DB_FILE_ID_LEN);
+ memcpy(dbp->fileid, btm->uid, DB_FILE_ID_LEN);
break;
case DB_HASHMAGIC:
if (type != DB_HASH && type != DB_UNKNOWN)
@@ -425,7 +446,7 @@ retry: switch (((BTMETA *)mbuf)->magic) {
/* Copy the file's unique id. */
need_fileid = 0;
- memcpy(dbp->lock.fileid, hashm->uid, DB_FILE_ID_LEN);
+ memcpy(dbp->fileid, hashm->uid, DB_FILE_ID_LEN);
break;
default:
if (swapped) {
@@ -489,11 +510,9 @@ empty: /*
F_SET(dbp, DB_AM_MLOCAL);
if (dbenv == NULL) {
- if ((dbp->mp_dbenv =
- (DB_ENV *)__db_calloc(sizeof(DB_ENV), 1)) == NULL) {
- ret = ENOMEM;
+ if ((ret = __os_calloc(1,
+ sizeof(DB_ENV), &dbp->mp_dbenv)) != 0)
goto err;
- }
envp = dbp->mp_dbenv;
restore = 0;
@@ -554,20 +573,20 @@ empty: /*
*/
if (need_fileid) {
if (fname == NULL) {
- memset(dbp->lock.fileid, 0, DB_FILE_ID_LEN);
+ memset(dbp->fileid, 0, DB_FILE_ID_LEN);
if (F_ISSET(dbp, DB_AM_LOCKING) &&
(ret = lock_id(dbenv->lk_info,
- (u_int32_t *)dbp->lock.fileid)) != 0)
+ (u_int32_t *)dbp->fileid)) != 0)
goto err;
} else
- if ((ret = __db_fileid(dbenv,
- real_name, 1, dbp->lock.fileid)) != 0)
+ if ((ret = __os_fileid(dbenv,
+ real_name, 1, dbp->fileid)) != 0)
goto err;
}
/* No further use for the real name. */
if (real_name != NULL)
- FREES(real_name);
+ __os_freestr(real_name);
real_name = NULL;
/*
@@ -595,7 +614,7 @@ empty: /*
memset(&finfo, 0, sizeof(finfo));
finfo.ftype = ftype;
finfo.pgcookie = &pgcookie;
- finfo.fileid = dbp->lock.fileid;
+ finfo.fileid = dbp->fileid;
finfo.lsn_offset = 0;
finfo.clear_len = DB_PAGE_CLEAR_LEN;
if ((ret = memp_fopen(dbp->mp, fname,
@@ -605,12 +624,21 @@ empty: /*
/*
* XXX
- * Truly spectacular layering violation. We need a per-thread mutex
- * that lives in shared memory (thanks, HP-UX!) and so we acquire a
- * pointer to the mpool one.
+ * We need a per-thread mutex that lives in shared memory -- HP-UX
+ * can't allocate mutexes in malloc'd memory. Allocate it from the
+ * shared memory region, since it's the only one that is guaranteed
+ * to exist.
*/
- if (F_ISSET(dbp, DB_AM_THREAD))
- dbp->mutexp = dbp->mpf->mutexp;
+ if (F_ISSET(dbp, DB_AM_THREAD)) {
+ if ((ret = __memp_reg_alloc(dbp->mp,
+ sizeof(db_mutex_t), NULL, &dbp->mutexp)) != 0)
+ goto err;
+ /*
+ * Since we only get here if DB_THREAD was specified, we know
+ * we have spinlocks and no file offset argument is needed.
+ */
+ (void)__db_mutex_init(dbp->mutexp, 0);
+ }
/* Get a log file id. */
if (F_ISSET(dbp, DB_AM_LOGGING) &&
@@ -618,18 +646,6 @@ empty: /*
dbp, fname, type, &dbp->log_fileid)) != 0)
goto err;
- /*
- * Get a locker id for this DB, and build the lock cookie: the first
- * db_pgno_t bytes are the page number, the next N bytes are the file
- * id.
- */
- if (F_ISSET(dbp, DB_AM_LOCKING)) {
- if ((ret = lock_id(dbenv->lk_info, &dbp->locker)) != 0)
- goto err;
- dbp->lock_dbt.size = sizeof(dbp->lock);
- dbp->lock_dbt.data = &dbp->lock;
- }
-
/* Call the real open function. */
switch (type) {
case DB_BTREE:
@@ -639,7 +655,7 @@ empty: /*
if (dbinfo != NULL && (ret = __db_fcchk(dbenv,
"db_open", dbinfo->flags, DB_DUP, DB_RECNUM)) != 0)
goto err;
- if ((ret = __bam_open(dbp, type, dbinfo)) != 0)
+ if ((ret = __bam_open(dbp, dbinfo)) != 0)
goto err;
break;
case DB_HASH:
@@ -655,24 +671,20 @@ empty: /*
if (dbinfo != NULL && (ret = __db_fchk(dbenv,
"db_open", dbinfo->flags, DB_INFO_FLAGS)) != 0)
goto err;
- if ((ret = __ram_open(dbp, type, dbinfo)) != 0)
+ if ((ret = __ram_open(dbp, dbinfo)) != 0)
goto err;
break;
default:
abort();
}
- /* Call a local close routine. */
- dbp->close = db_close;
- dbp->fd = db_fd;
-
*dbpp = dbp;
return (0);
einval: ret = EINVAL;
err: /* Close the file descriptor. */
if (fd != -1)
- (void)__db_close(fd);
+ (void)__os_close(fd);
/* Discard the log file id. */
if (dbp->log_fileid != 0)
@@ -688,90 +700,60 @@ err: /* Close the file descriptor. */
/* If we allocated a DB_ENV, discard it. */
if (dbp->mp_dbenv != NULL)
- FREE(dbp->mp_dbenv, sizeof(DB_ENV));
+ __os_free(dbp->mp_dbenv, sizeof(DB_ENV));
if (real_name != NULL)
- FREES(real_name);
+ __os_freestr(real_name);
if (dbp != NULL)
- FREE(dbp, sizeof(DB));
+ __os_free(dbp, sizeof(DB));
return (ret);
}
-#ifdef _LIBC
-# undef db_open
-weak_alias (__nss_db_open, db_open)
-#endif
-
/*
- * db_close --
+ * __db_close --
* Close a DB tree.
+ *
+ * PUBLIC: int __db_close __P((DB *, u_int32_t));
*/
-static int
-db_close(dbp, flags)
+int
+__db_close(dbp, flags)
DB *dbp;
u_int32_t flags;
{
DBC *dbc;
- DB *tdbp;
int ret, t_ret;
+ DB_PANIC_CHECK(dbp);
+
/* Validate arguments. */
- if ((ret = __db_fchk(dbp->dbenv, "db_close", flags, DB_NOSYNC)) != 0)
+ if ((ret = __db_closechk(dbp, flags)) != 0)
return (ret);
/* Sync the underlying file. */
- if (!LF_ISSET(DB_NOSYNC) &&
+ if (flags != DB_NOSYNC &&
(t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0)
ret = t_ret;
/*
- * Call the underlying access method close routine for all the
- * cursors and handles.
+ * Go through the active cursors and call the cursor recycle routine,
+ * which resolves pending operations and moves the cursors onto the
+ * free list. Then, walk the free list and call the cursor destroy
+ * routine.
*/
- for (tdbp = LIST_FIRST(&dbp->handleq);
- tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) {
- while ((dbc = TAILQ_FIRST(&tdbp->curs_queue)) != NULL)
- switch (tdbp->type) {
- case DB_BTREE:
- if ((t_ret =
- __bam_c_iclose(tdbp, dbc)) != 0 && ret == 0)
- ret = t_ret;
- break;
- case DB_HASH:
- if ((t_ret =
- __ham_c_iclose(tdbp, dbc)) != 0 && ret == 0)
- ret = t_ret;
- break;
- case DB_RECNO:
- if ((t_ret =
- __ram_c_iclose(tdbp, dbc)) != 0 && ret == 0)
- ret = t_ret;
- break;
- default:
- abort();
- }
-
- switch (tdbp->type) {
- case DB_BTREE:
- if ((t_ret = __bam_close(tdbp)) != 0 && ret == 0)
- ret = t_ret;
- break;
- case DB_HASH:
- if ((t_ret = __ham_close(tdbp)) != 0 && ret == 0)
- ret = t_ret;
- break;
- case DB_RECNO:
- if ((t_ret = __ram_close(tdbp)) != 0 && ret == 0)
- ret = t_ret;
- break;
- default:
- abort();
- }
- }
+ while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
+ if ((t_ret = __db_c_destroy(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /* Call the access specific close function. */
+ if ((t_ret = dbp->am_close(dbp)) != 0 && ret == 0)
+ ret = t_ret;
/* Sync the memory pool. */
- if (!LF_ISSET(DB_NOSYNC) && (t_ret = memp_fsync(dbp->mpf)) != 0 &&
+ if (flags != DB_NOSYNC && (t_ret = memp_fsync(dbp->mpf)) != 0 &&
t_ret != DB_INCOMPLETE && ret == 0)
ret = t_ret;
@@ -788,91 +770,12 @@ db_close(dbp, flags)
if (F_ISSET(dbp, DB_AM_LOGGING))
(void)log_unregister(dbp->dbenv->lg_info, dbp->log_fileid);
- /* Discard the lock cookie for all handles. */
- for (tdbp = LIST_FIRST(&dbp->handleq);
- tdbp != NULL; tdbp = LIST_NEXT(tdbp, links))
- if (F_ISSET(tdbp, DB_AM_LOCKING)) {
-#ifdef DEBUG
- DB_LOCKREQ request;
-
- /*
- * If we're running tests, display any locks currently
- * held. It's possible that some applications may hold
- * locks for long periods, e.g., conference room locks,
- * but the DB tests should never close holding locks.
- */
- request.op = DB_LOCK_DUMP;
- if ((t_ret = lock_vec(tdbp->dbenv->lk_info,
- tdbp->locker, 0, &request, 1, NULL)) != 0 &&
- ret == 0)
- ret = EAGAIN;
-#endif
- }
-
/* If we allocated a DB_ENV, discard it. */
if (dbp->mp_dbenv != NULL)
- FREE(dbp->mp_dbenv, sizeof(DB_ENV));
+ __os_free(dbp->mp_dbenv, sizeof(DB_ENV));
- /* Free all of the DB's. */
- LIST_REMOVE(dbp, links);
- while ((tdbp = LIST_FIRST(&dbp->handleq)) != NULL) {
- LIST_REMOVE(tdbp, links);
- FREE(tdbp, sizeof(*tdbp));
- }
- FREE(dbp, sizeof(*dbp));
+ /* Free the DB. */
+ __os_free(dbp, sizeof(*dbp));
return (ret);
}
-
-/*
- * db_fd --
- * Return a file descriptor for flock'ing.
- */
-static int
-db_fd(dbp, fdp)
- DB *dbp;
- int *fdp;
-{
- /*
- * XXX
- * Truly spectacular layering violation.
- */
- return (__mp_xxx_fd(dbp->mpf, fdp));
-}
-
-/*
- * __db_pgerr --
- * Error when unable to retrieve a specified page.
- *
- * PUBLIC: int __db_pgerr __P((DB *, db_pgno_t));
- */
-int
-__db_pgerr(dbp, pgno)
- DB *dbp;
- db_pgno_t pgno;
-{
- /*
- * Three things are certain:
- * Death, taxes, and lost data.
- * Guess which has occurred.
- */
- __db_err(dbp->dbenv,
- "unable to create/retrieve page %lu", (u_long)pgno);
- return (__db_panic(dbp));
-}
-
-/*
- * __db_pgfmt --
- * Error when a page has the wrong format.
- *
- * PUBLIC: int __db_pgfmt __P((DB *, db_pgno_t));
- */
-int
-__db_pgfmt(dbp, pgno)
- DB *dbp;
- db_pgno_t pgno;
-{
- __db_err(dbp->dbenv,
- "page %lu: illegal page type or format", (u_long)pgno);
- return (__db_panic(dbp));
-}
diff --git a/db2/db/db.src b/db2/db/db.src
index 91d8b390a1..26557e10ac 100644
--- a/db2/db/db.src
+++ b/db2/db/db.src
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*
- * @(#)db.src 10.6 (Sleepycat) 4/28/98
+ * @(#)db.src 10.8 (Sleepycat) 9/20/98
*/
PREFIX db
@@ -98,6 +98,7 @@ END
/*
* relink -- Handles relinking around a page.
*
+ * opcode: indicates if this is an addpage or delete page
* pgno: the page being changed.
* lsn the page's original lsn.
* prev: the previous page.
@@ -106,6 +107,7 @@ END
* lsn_next: the previous page's original lsn.
*/
BEGIN relink
+ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
@@ -148,12 +150,3 @@ DBT key DBT s
DBT data DBT s
ARG arg_flags u_int32_t lu
END
-
-/*
- * noop -- do nothing, but get an LSN.
- */
-BEGIN noop
-ARG fileid u_int32_t lu
-ARG pgno db_pgno_t lu
-POINTER prevlsn DB_LSN * lu
-END
diff --git a/db2/db/db_am.c b/db2/db/db_am.c
new file mode 100644
index 0000000000..e02ad57f53
--- /dev/null
+++ b/db2/db/db_am.c
@@ -0,0 +1,430 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_am.c 10.15 (Sleepycat) 12/30/98";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "btree.h"
+#include "hash.h"
+#include "db_am.h"
+#include "db_ext.h"
+
+static int __db_c_close __P((DBC *));
+static int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
+static int __db_fd __P((DB *, int *));
+static int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+static int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+
+/*
+ * __db_init_wrapper --
+ * Wrapper layer to implement generic DB functions.
+ *
+ * PUBLIC: int __db_init_wrapper __P((DB *));
+ */
+int
+__db_init_wrapper(dbp)
+ DB *dbp;
+{
+ dbp->close = __db_close;
+ dbp->cursor = __db_cursor;
+ dbp->del = NULL; /* !!! Must be set by access method. */
+ dbp->fd = __db_fd;
+ dbp->get = __db_get;
+ dbp->join = __db_join;
+ dbp->put = __db_put;
+ dbp->stat = NULL; /* !!! Must be set by access method. */
+ dbp->sync = __db_sync;
+
+ return (0);
+}
+
+/*
+ * __db_cursor --
+ * Allocate and return a cursor.
+ */
+static int
+__db_cursor(dbp, txn, dbcp, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBC **dbcp;
+ u_int32_t flags;
+{
+ DBC *dbc, *adbc;
+ int ret;
+ db_lockmode_t mode;
+ u_int32_t op;
+
+ DB_PANIC_CHECK(dbp);
+
+ /* Take one from the free list if it's available. */
+ DB_THREAD_LOCK(dbp);
+ if ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
+ TAILQ_REMOVE(&dbp->free_queue, dbc, links);
+ else {
+ DB_THREAD_UNLOCK(dbp);
+
+ if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0)
+ return (ret);
+
+ dbc->dbp = dbp;
+ dbc->c_close = __db_c_close;
+
+ /* Set up locking information. */
+ if (F_ISSET(dbp, DB_AM_LOCKING | DB_AM_CDB)) {
+ /*
+ * If we are not threaded, then there is no need to
+ * create new locker ids. We know that no one else
+ * is running concurrently using this DB, so we can
+ * take a peek at any cursors on the active queue.
+ */
+ if (!F_ISSET(dbp, DB_AM_THREAD) &&
+ (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
+ dbc->lid = adbc->lid;
+ else
+ if ((ret = lock_id(dbp->dbenv->lk_info,
+ &dbc->lid)) != 0)
+ goto err;
+
+ memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
+ if (F_ISSET(dbp, DB_AM_CDB)) {
+ dbc->lock_dbt.size = DB_FILE_ID_LEN;
+ dbc->lock_dbt.data = dbc->lock.fileid;
+ } else {
+ dbc->lock_dbt.size = sizeof(dbc->lock);
+ dbc->lock_dbt.data = &dbc->lock;
+ }
+ }
+
+ switch (dbp->type) {
+ case DB_BTREE:
+ case DB_RECNO:
+ if ((ret = __bam_c_init(dbc)) != 0)
+ goto err;
+ break;
+ case DB_HASH:
+ if ((ret = __ham_c_init(dbc)) != 0)
+ goto err;
+ break;
+ default:
+ ret = EINVAL;
+ goto err;
+ }
+
+ DB_THREAD_LOCK(dbp);
+ }
+
+ if ((dbc->txn = txn) == NULL)
+ dbc->locker = dbc->lid;
+ else
+ dbc->locker = txn->txnid;
+
+ TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
+ DB_THREAD_UNLOCK(dbp);
+
+ /*
+ * If this is the concurrent DB product, then we do all locking
+ * in the interface, which is right here.
+ */
+ if (F_ISSET(dbp, DB_AM_CDB)) {
+ op = LF_ISSET(DB_OPFLAGS_MASK);
+ mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
+ (LF_ISSET(DB_RMW) ? DB_LOCK_IWRITE : DB_LOCK_READ);
+ if ((ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 0,
+ &dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
+ (void)__db_c_close(dbc);
+ return (EAGAIN);
+ }
+ if (LF_ISSET(DB_RMW))
+ F_SET(dbc, DBC_RMW);
+ if (op == DB_WRITELOCK)
+ F_SET(dbc, DBC_WRITER);
+ }
+
+ *dbcp = dbc;
+ return (0);
+
+err: __os_free(dbc, sizeof(*dbc));
+ return (ret);
+}
+
+/*
+ * __db_c_close --
+ * Close the cursor (recycle for later use).
+ */
+static int
+__db_c_close(dbc)
+ DBC *dbc;
+{
+ DB *dbp;
+ int ret, t_ret;
+
+ dbp = dbc->dbp;
+
+ DB_PANIC_CHECK(dbp);
+
+ ret = 0;
+
+ /*
+ * We cannot release the lock until after we've called the
+ * access method specific routine, since btrees may have pending
+ * deletes.
+ */
+
+ /* Remove the cursor from the active queue. */
+ DB_THREAD_LOCK(dbp);
+ TAILQ_REMOVE(&dbp->active_queue, dbc, links);
+ DB_THREAD_UNLOCK(dbp);
+
+ /* Call the access specific cursor close routine. */
+ if ((t_ret = dbc->c_am_close(dbc)) != 0 && ret == 0)
+ t_ret = ret;
+
+ /* Release the lock. */
+ if (F_ISSET(dbc->dbp, DB_AM_CDB) && dbc->mylock != LOCK_INVALID) {
+ ret = lock_put(dbc->dbp->dbenv->lk_info, dbc->mylock);
+ dbc->mylock = LOCK_INVALID;
+ }
+
+ /* Clean up the cursor. */
+ dbc->flags = 0;
+
+#ifdef DEBUG
+ /*
+ * Check for leftover locks, unless we're running with transactions.
+ *
+ * If we're running tests, display any locks currently held. It's
+ * possible that some applications may hold locks for long periods,
+ * e.g., conference room locks, but the DB tests should never close
+ * holding locks.
+ */
+ if (F_ISSET(dbp, DB_AM_LOCKING) && dbc->lid == dbc->locker) {
+ DB_LOCKREQ request;
+
+ request.op = DB_LOCK_DUMP;
+ if ((t_ret = lock_vec(dbp->dbenv->lk_info,
+ dbc->locker, 0, &request, 1, NULL)) != 0 && ret == 0)
+ ret = EAGAIN;
+ }
+#endif
+ /* Move the cursor to the free queue. */
+ DB_THREAD_LOCK(dbp);
+ TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links);
+ DB_THREAD_UNLOCK(dbp);
+
+ return (ret);
+}
+
+#ifdef DEBUG
+/*
+ * __db_cprint --
+ * Display the current cursor list.
+ *
+ * PUBLIC: int __db_cprint __P((DB *));
+ */
+int
+__db_cprint(dbp)
+ DB *dbp;
+{
+ static const FN fn[] = {
+ { DBC_RECOVER, "recover" },
+ { DBC_RMW, "read-modify-write" },
+ { 0 },
+ };
+ DBC *dbc;
+
+ DB_THREAD_LOCK(dbp);
+ for (dbc = TAILQ_FIRST(&dbp->active_queue);
+ dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+ fprintf(stderr,
+ "%#0x: dbp: %#0x txn: %#0x lid: %lu locker: %lu",
+ (u_int)dbc, (u_int)dbc->dbp, (u_int)dbc->txn,
+ (u_long)dbc->lid, (u_long)dbc->locker);
+ __db_prflags(dbc->flags, fn, stderr);
+ fprintf(stderr, "\n");
+ }
+ DB_THREAD_UNLOCK(dbp);
+
+ return (0);
+}
+#endif /* DEBUG */
+
+/*
+ * __db_c_destroy --
+ * Destroy the cursor.
+ *
+ * PUBLIC: int __db_c_destroy __P((DBC *));
+ */
+int
+__db_c_destroy(dbc)
+ DBC *dbc;
+{
+ DB *dbp;
+ int ret;
+
+ dbp = dbc->dbp;
+
+ /* Remove the cursor from the free queue. */
+ DB_THREAD_LOCK(dbp);
+ TAILQ_REMOVE(&dbp->free_queue, dbc, links);
+ DB_THREAD_UNLOCK(dbp);
+
+ /* Call the access specific cursor destroy routine. */
+ ret = dbc->c_am_destroy == NULL ? 0 : dbc->c_am_destroy(dbc);
+
+ /* Free up allocated memory. */
+ if (dbc->rkey.data != NULL)
+ __os_free(dbc->rkey.data, dbc->rkey.ulen);
+ if (dbc->rdata.data != NULL)
+ __os_free(dbc->rdata.data, dbc->rdata.ulen);
+ __os_free(dbc, sizeof(*dbc));
+
+ return (0);
+}
+
+/*
+ * db_fd --
+ * Return a file descriptor for flock'ing.
+ */
+static int
+__db_fd(dbp, fdp)
+ DB *dbp;
+ int *fdp;
+{
+ DB_PANIC_CHECK(dbp);
+
+ /*
+ * XXX
+ * Truly spectacular layering violation.
+ */
+ return (__mp_xxx_fd(dbp->mpf, fdp));
+}
+
+/*
+ * __db_get --
+ * Return a key/data pair.
+ */
+static int
+__db_get(dbp, txn, key, data, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key, *data;
+ u_int32_t flags;
+{
+ DBC *dbc;
+ int ret, t_ret;
+
+ DB_PANIC_CHECK(dbp);
+
+ if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
+ return (ret);
+
+ if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
+ return (ret);
+
+ DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags);
+
+ ret = dbc->c_get(dbc, key, data,
+ flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);
+
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __db_put --
+ * Store a key/data pair.
+ */
+static int
+__db_put(dbp, txn, key, data, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key, *data;
+ u_int32_t flags;
+{
+ DBC *dbc;
+ DBT tdata;
+ int ret, t_ret;
+
+ DB_PANIC_CHECK(dbp);
+
+ if ((ret = __db_putchk(dbp, key, data,
+ flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
+ return (ret);
+
+ if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
+ return (ret);
+
+ DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags);
+
+ if (flags == DB_NOOVERWRITE) {
+ /*
+ * Set DB_DBT_USERMEM, this might be a threaded application and
+ * the flags checking will catch us. We don't want the actual
+ * data, so request a partial of length 0.
+ */
+ memset(&tdata, 0, sizeof(tdata));
+ F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
+ if ((ret = dbc->c_get(dbc, key, &tdata, DB_SET | DB_RMW)) == 0)
+ ret = DB_KEYEXIST;
+ else
+ ret = 0;
+ }
+ if (ret == 0)
+ ret = dbc->c_put(dbc, key, data, DB_KEYLAST);
+
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __db_sync --
+ * Flush the database cache.
+ *
+ * PUBLIC: int __db_sync __P((DB *, u_int32_t));
+ */
+int
+__db_sync(dbp, flags)
+ DB *dbp;
+ u_int32_t flags;
+{
+ int ret;
+
+ DB_PANIC_CHECK(dbp);
+
+ if ((ret = __db_syncchk(dbp, flags)) != 0)
+ return (ret);
+
+ /* If it wasn't possible to modify the file, we're done. */
+ if (F_ISSET(dbp, DB_AM_INMEM | DB_AM_RDONLY))
+ return (0);
+
+ /* Flush any dirty pages from the cache to the backing file. */
+ if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
+ ret = 0;
+
+ return (ret);
+}
diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c
index 5203e0a94c..e3dba23c8b 100644
--- a/db2/db/db_auto.c
+++ b/db2/db/db_auto.c
@@ -10,7 +10,6 @@
#endif
#include "db_int.h"
-#include "shqueue.h"
#include "db_page.h"
#include "db_dispatch.h"
#include "db_am.h"
@@ -46,8 +45,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_addrem;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
@@ -60,8 +58,8 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
+ sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size)
+ sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size)
+ sizeof(*pagelsn);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -112,7 +110,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -174,7 +172,7 @@ __db_addrem_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\tpagelsn: [%lu][%lu]\n",
(u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -188,11 +186,12 @@ __db_addrem_read(recbuf, argpp)
{
__db_addrem_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_addrem_args *)__db_malloc(sizeof(__db_addrem_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_addrem_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -253,8 +252,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_split;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
@@ -264,8 +262,8 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
+ sizeof(pgno)
+ sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
+ sizeof(*pagelsn);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -302,7 +300,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -353,7 +351,7 @@ __db_split_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\tpagelsn: [%lu][%lu]\n",
(u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -367,11 +365,12 @@ __db_split_read(recbuf, argpp)
{
__db_split_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_split_args *)__db_malloc(sizeof(__db_split_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_split_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -430,8 +429,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_big;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
@@ -445,8 +443,8 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
+ sizeof(*pagelsn)
+ sizeof(*prevlsn)
+ sizeof(*nextlsn);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -497,7 +495,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -554,7 +552,7 @@ __db_big_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\tnextlsn: [%lu][%lu]\n",
(u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -568,11 +566,12 @@ __db_big_read(recbuf, argpp)
{
__db_big_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_big_args *)__db_malloc(sizeof(__db_big_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_big_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -630,8 +629,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_ovref;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
@@ -640,8 +638,8 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
+ sizeof(pgno)
+ sizeof(adjust)
+ sizeof(*lsn);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -668,7 +666,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -710,7 +708,7 @@ __db_ovref_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\tlsn: [%lu][%lu]\n",
(u_long)argp->lsn.file, (u_long)argp->lsn.offset);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -724,11 +722,12 @@ __db_ovref_read(recbuf, argpp)
{
__db_ovref_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_ovref_args *)__db_malloc(sizeof(__db_ovref_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_ovref_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -752,16 +751,17 @@ __db_ovref_read(recbuf, argpp)
/*
* PUBLIC: int __db_relink_log
* PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
- * PUBLIC: DB_LSN *, db_pgno_t, DB_LSN *));
+ * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DB_LSN *,
+ * PUBLIC: db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *));
*/
int __db_relink_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, prev, lsn_prev, next,
- lsn_next)
+ opcode, fileid, pgno, lsn, prev, lsn_prev,
+ next, lsn_next)
DB_LOG *logp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
+ u_int32_t opcode;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN * lsn;
@@ -779,12 +779,12 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_relink;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(opcode)
+ sizeof(fileid)
+ sizeof(pgno)
+ sizeof(*lsn)
@@ -792,8 +792,8 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
+ sizeof(*lsn_prev)
+ sizeof(next)
+ sizeof(*lsn_next);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -802,6 +802,8 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
bp += sizeof(txn_num);
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
+ memcpy(bp, &opcode, sizeof(opcode));
+ bp += sizeof(opcode);
memcpy(bp, &fileid, sizeof(fileid));
bp += sizeof(fileid);
memcpy(bp, &pgno, sizeof(pgno));
@@ -832,7 +834,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -868,6 +870,7 @@ __db_relink_print(notused1, dbtp, lsnp, notused2, notused3)
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
printf("\tfileid: %lu\n", (u_long)argp->fileid);
printf("\tpgno: %lu\n", (u_long)argp->pgno);
printf("\tlsn: [%lu][%lu]\n",
@@ -879,7 +882,7 @@ __db_relink_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\tlsn_next: [%lu][%lu]\n",
(u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -893,11 +896,12 @@ __db_relink_read(recbuf, argpp)
{
__db_relink_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_relink_args *)__db_malloc(sizeof(__db_relink_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_relink_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -906,6 +910,8 @@ __db_relink_read(recbuf, argpp)
bp += sizeof(argp->txnid->txnid);
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
memcpy(&argp->fileid, bp, sizeof(argp->fileid));
bp += sizeof(argp->fileid);
memcpy(&argp->pgno, bp, sizeof(argp->pgno));
@@ -951,8 +957,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_addpage;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
@@ -962,8 +967,8 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
+ sizeof(*lsn)
+ sizeof(nextpgno)
+ sizeof(*nextlsn);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -995,7 +1000,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -1039,7 +1044,7 @@ __db_addpage_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\tnextlsn: [%lu][%lu]\n",
(u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -1053,11 +1058,12 @@ __db_addpage_read(recbuf, argpp)
{
__db_addpage_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_addpage_args *)__db_malloc(sizeof(__db_addpage_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_addpage_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -1108,8 +1114,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
rectype = DB_db_debug;
txn_num = txnid == NULL ? 0 : txnid->txnid;
if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
+ ZERO_LSN(null_lsn);
lsnp = &null_lsn;
} else
lsnp = &txnid->last_lsn;
@@ -1119,8 +1124,8 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
+ sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
+ sizeof(u_int32_t) + (data == NULL ? 0 : data->size)
+ sizeof(arg_flags);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0)
+ return (ret);
bp = logrec.data;
memcpy(bp, &rectype, sizeof(rectype));
@@ -1170,7 +1175,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
if (txnid != NULL)
txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
+ __os_free(logrec.data, 0);
return (ret);
}
@@ -1236,7 +1241,7 @@ __db_debug_print(notused1, dbtp, lsnp, notused2, notused3)
printf("\n");
printf("\targ_flags: %lu\n", (u_long)argp->arg_flags);
printf("\n");
- __db_free(argp);
+ __os_free(argp, 0);
return (0);
}
@@ -1250,11 +1255,12 @@ __db_debug_read(recbuf, argpp)
{
__db_debug_args *argp;
u_int8_t *bp;
+ int ret;
- argp = (__db_debug_args *)__db_malloc(sizeof(__db_debug_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
+ ret = __os_malloc(sizeof(__db_debug_args) +
+ sizeof(DB_TXN), NULL, &argp);
+ if (ret != 0)
+ return (ret);
argp->txnid = (DB_TXN *)&argp[1];
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
@@ -1284,143 +1290,6 @@ __db_debug_read(recbuf, argpp)
}
/*
- * PUBLIC: int __db_noop_log
- * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *));
- */
-int __db_noop_log(logp, txnid, ret_lsnp, flags,
- fileid, pgno, prevlsn)
- DB_LOG *logp;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * prevlsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_db_noop;
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- null_lsn.file = 0;
- null_lsn.offset = 0;
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*prevlsn);
- if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
- return (ENOMEM);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (prevlsn != NULL)
- memcpy(bp, prevlsn, sizeof(*prevlsn));
- else
- memset(bp, 0, sizeof(*prevlsn));
- bp += sizeof(*prevlsn);
-#ifdef DIAGNOSTIC
- if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
- fprintf(stderr, "Error in log record length");
-#endif
- ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __db_free(logrec.data);
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_noop_print
- * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__db_noop_print(notused1, dbtp, lsnp, notused2, notused3)
- DB_LOG *notused1;
- DBT *dbtp;
- DB_LSN *lsnp;
- int notused2;
- void *notused3;
-{
- __db_noop_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused1 = NULL;
- notused2 = 0;
- notused3 = NULL;
-
- if ((ret = __db_noop_read(dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]db_noop: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %lu\n", (u_long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tprevlsn: [%lu][%lu]\n",
- (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
- printf("\n");
- __db_free(argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_noop_read __P((void *, __db_noop_args **));
- */
-int
-__db_noop_read(recbuf, argpp)
- void *recbuf;
- __db_noop_args **argpp;
-{
- __db_noop_args *argp;
- u_int8_t *bp;
-
- argp = (__db_noop_args *)__db_malloc(sizeof(__db_noop_args) +
- sizeof(DB_TXN));
- if (argp == NULL)
- return (ENOMEM);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn));
- bp += sizeof(argp->prevlsn);
- *argpp = argp;
- return (0);
-}
-
-/*
* PUBLIC: int __db_init_print __P((DB_ENV *));
*/
int
@@ -1450,9 +1319,6 @@ __db_init_print(dbenv)
if ((ret = __db_add_recovery(dbenv,
__db_debug_print, DB_db_debug)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __db_noop_print, DB_db_noop)) != 0)
- return (ret);
return (0);
}
@@ -1486,9 +1352,6 @@ __db_init_recover(dbenv)
if ((ret = __db_add_recovery(dbenv,
__db_debug_recover, DB_db_debug)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __db_noop_recover, DB_db_noop)) != 0)
- return (ret);
return (0);
}
diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c
index 8645948614..616d08c3ff 100644
--- a/db2/db/db_dispatch.c
+++ b/db2/db/db_dispatch.c
@@ -43,13 +43,14 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_dispatch.c 10.14 (Sleepycat) 5/3/98";
+static const char sccsid[] = "@(#)db_dispatch.c 10.20 (Sleepycat) 10/10/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
+#include <shqueue.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
@@ -61,6 +62,7 @@ static const char sccsid[] = "@(#)db_dispatch.c 10.14 (Sleepycat) 5/3/98";
#include "db_am.h"
#include "common_ext.h"
#include "log_auto.h"
+#include "txn.h"
#include "txn_auto.h"
/*
@@ -148,27 +150,16 @@ __db_add_recovery(dbenv, func, ndx)
u_int32_t ndx;
{
u_int32_t i;
+ int ret;
- /* Check if function is already registered. */
- if (dispatch_table && ndx < dispatch_size &&
- dispatch_table[ndx] != 0 && dispatch_table[ndx] != func)
- return (DB_REGISTERED);
+ COMPQUIET(dbenv, NULL); /* !!!: not currently used. */
/* Check if we have to grow the table. */
if (ndx >= dispatch_size) {
- if (dispatch_table == NULL)
- dispatch_table = (int (**)
- __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
- __db_malloc(DB_user_BEGIN * sizeof(dispatch_table[0]));
- else
- dispatch_table = (int (**)
- __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
- __db_realloc(dispatch_table, (DB_user_BEGIN +
- dispatch_size) * sizeof(dispatch_table[0]));
- if (dispatch_table == NULL) {
- __db_err(dbenv, "%s", strerror(ENOMEM));
- return (ENOMEM);
- }
+ if ((ret = __os_realloc(&dispatch_table,
+ (DB_user_BEGIN + dispatch_size) *
+ sizeof(dispatch_table[0]))) != 0)
+ return (ret);
for (i = dispatch_size,
dispatch_size += DB_user_BEGIN; i < dispatch_size; ++i)
dispatch_table[i] = NULL;
@@ -189,9 +180,10 @@ __db_txnlist_init(retp)
void *retp;
{
DB_TXNHEAD *headp;
+ int ret;
- if ((headp = (DB_TXNHEAD *)__db_malloc(sizeof(DB_TXNHEAD))) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(sizeof(DB_TXNHEAD), NULL, &headp)) != 0)
+ return (ret);
LIST_INIT(&headp->head);
headp->maxid = 0;
@@ -214,9 +206,10 @@ __db_txnlist_add(listp, txnid)
{
DB_TXNHEAD *hp;
DB_TXNLIST *elp;
+ int ret;
- if ((elp = (DB_TXNLIST *)__db_malloc(sizeof(DB_TXNLIST))) == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(sizeof(DB_TXNLIST), NULL, &elp)) != 0)
+ return (ret);
elp->txnid = txnid;
hp = (DB_TXNHEAD *)listp;
@@ -269,9 +262,9 @@ __db_txnlist_end(listp)
hp = (DB_TXNHEAD *)listp;
while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) {
LIST_REMOVE(p, links);
- __db_free(p);
+ __os_free(p, 0);
}
- __db_free(listp);
+ __os_free(listp, sizeof(DB_TXNHEAD));
}
/*
diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c
index 6379fc1729..2673bbcd61 100644
--- a/db2/db/db_dup.c
+++ b/db2/db/db_dup.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_dup.c 10.18 (Sleepycat) 5/31/98";
+static const char sccsid[] = "@(#)db_dup.c 10.35 (Sleepycat) 12/2/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -23,25 +23,25 @@ static const char sccsid[] = "@(#)db_dup.c 10.18 (Sleepycat) 5/31/98";
#include "btree.h"
#include "db_am.h"
-static int __db_addpage __P((DB *,
- PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
-static int __db_dsplit __P((DB *,
- PAGE **, db_indx_t *, u_int32_t, int (*)(DB *, u_int32_t, PAGE **)));
+static int __db_addpage __P((DBC *,
+ PAGE **, db_indx_t *, int (*)(DBC *, u_int32_t, PAGE **)));
+static int __db_dsplit __P((DBC *,
+ PAGE **, db_indx_t *, u_int32_t, int (*)(DBC *, u_int32_t, PAGE **)));
/*
* __db_dput --
* Put a duplicate item onto a duplicate page at the given index.
*
- * PUBLIC: int __db_dput __P((DB *,
- * PUBLIC: DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
+ * PUBLIC: int __db_dput __P((DBC *, DBT *,
+ * PUBLIC: PAGE **, db_indx_t *, int (*)(DBC *, u_int32_t, PAGE **)));
*/
int
-__db_dput(dbp, dbt, pp, indxp, newfunc)
- DB *dbp;
+__db_dput(dbc, dbt, pp, indxp, newfunc)
+ DBC *dbc;
DBT *dbt;
PAGE **pp;
db_indx_t *indxp;
- int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+ int (*newfunc) __P((DBC *, u_int32_t, PAGE **));
{
BOVERFLOW bo;
DBT *data_dbtp, hdr_dbt, *hdr_dbtp;
@@ -54,10 +54,12 @@ __db_dput(dbp, dbt, pp, indxp, newfunc)
* We need some access method independent threshold for when we put
* a duplicate item onto an overflow page.
*/
- if (dbt->size > 0.25 * dbp->pgsize) {
- if ((ret = __db_poff(dbp, dbt, &pgno, newfunc)) != 0)
+ if (dbt->size > 0.25 * dbc->dbp->pgsize) {
+ if ((ret = __db_poff(dbc, dbt, &pgno, newfunc)) != 0)
return (ret);
+ UMRW(bo.unused1);
B_TSET(bo.type, B_OVERFLOW, 0);
+ UMRW(bo.unused2);
bo.tlen = dbt->size;
bo.pgno = pgno;
hdr_dbt.data = &bo;
@@ -75,11 +77,14 @@ __db_dput(dbp, dbt, pp, indxp, newfunc)
pagep = *pp;
if (size > P_FREESPACE(pagep)) {
if (*indxp == NUM_ENT(*pp) && NEXT_PGNO(*pp) == PGNO_INVALID)
- ret = __db_addpage(dbp, pp, indxp, newfunc);
+ ret = __db_addpage(dbc, pp, indxp, newfunc);
else
- ret = __db_dsplit(dbp, pp, indxp, isize, newfunc);
+ ret = __db_dsplit(dbc, pp, indxp, isize, newfunc);
if (ret != 0)
- /* XXX: Pages not returned to free list. */
+ /*
+ * XXX
+ * Pages not returned to free list.
+ */
return (ret);
pagep = *pp;
}
@@ -88,11 +93,11 @@ __db_dput(dbp, dbt, pp, indxp, newfunc)
* Now, pagep references the page on which to insert and indx is the
* the location to insert.
*/
- if ((ret = __db_pitem(dbp,
+ if ((ret = __db_pitem(dbc,
pagep, (u_int32_t)*indxp, isize, hdr_dbtp, data_dbtp)) != 0)
return (ret);
- (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+ (void)memp_fset(dbc->dbp->mpf, pagep, DB_MPOOL_DIRTY);
return (0);
}
@@ -100,15 +105,15 @@ __db_dput(dbp, dbt, pp, indxp, newfunc)
* __db_drem --
* Remove a duplicate at the given index on the given page.
*
- * PUBLIC: int __db_drem __P((DB *,
- * PUBLIC: PAGE **, u_int32_t, int (*)(DB *, PAGE *)));
+ * PUBLIC: int __db_drem __P((DBC *,
+ * PUBLIC: PAGE **, u_int32_t, int (*)(DBC *, PAGE *)));
*/
int
-__db_drem(dbp, pp, indx, freefunc)
- DB *dbp;
+__db_drem(dbc, pp, indx, freefunc)
+ DBC *dbc;
PAGE **pp;
u_int32_t indx;
- int (*freefunc) __P((DB *, PAGE *));
+ int (*freefunc) __P((DBC *, PAGE *));
{
PAGE *pagep;
int ret;
@@ -117,12 +122,12 @@ __db_drem(dbp, pp, indx, freefunc)
/* Check if we are freeing a big item. */
if (B_TYPE(GET_BKEYDATA(pagep, indx)->type) == B_OVERFLOW) {
- if ((ret = __db_doff(dbp,
+ if ((ret = __db_doff(dbc,
GET_BOVERFLOW(pagep, indx)->pgno, freefunc)) != 0)
return (ret);
- ret = __db_ditem(dbp, pagep, indx, BOVERFLOW_SIZE);
+ ret = __db_ditem(dbc, pagep, indx, BOVERFLOW_SIZE);
} else
- ret = __db_ditem(dbp, pagep, indx,
+ ret = __db_ditem(dbc, pagep, indx,
BKEYDATA_SIZE(GET_BKEYDATA(pagep, indx)->len));
if (ret != 0)
return (ret);
@@ -137,12 +142,12 @@ __db_drem(dbp, pp, indx, freefunc)
* !!!
* __db_relink will set the dirty bit for us.
*/
- if ((ret = __db_relink(dbp, pagep, pp, 0)) != 0)
+ if ((ret = __db_relink(dbc, DB_REM_PAGE, pagep, pp, 0)) != 0)
return (ret);
- if ((ret = freefunc(dbp, pagep)) != 0)
+ if ((ret = freefunc(dbc, pagep)) != 0)
return (ret);
} else
- (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+ (void)memp_fset(dbc->dbp->mpf, pagep, DB_MPOOL_DIRTY);
return (0);
}
@@ -151,32 +156,41 @@ __db_drem(dbp, pp, indx, freefunc)
* __db_dend --
* Find the last page in a set of offpage duplicates.
*
- * PUBLIC: int __db_dend __P((DB *, db_pgno_t, PAGE **));
+ * PUBLIC: int __db_dend __P((DBC *, db_pgno_t, PAGE **));
*/
int
-__db_dend(dbp, pgno, pagep)
- DB *dbp;
+__db_dend(dbc, pgno, pp)
+ DBC *dbc;
db_pgno_t pgno;
- PAGE **pagep;
+ PAGE **pp;
{
+ DB *dbp;
PAGE *h;
int ret;
+ dbp = dbc->dbp;
+
/*
* This implements DB_KEYLAST. The last page is returned in pp; pgno
* should be the page number of the first page of the duplicate chain.
+ *
+ * *pp may be non-NULL -- if given a valid page use it.
*/
+ if (*pp != NULL)
+ goto started;
for (;;) {
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
+started: h = *pp;
+
if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID)
break;
- (void)memp_fput(dbp->mpf, h, 0);
- }
- *pagep = h;
+ if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ return (ret);
+ }
return (0);
}
@@ -191,41 +205,44 @@ __db_dend(dbp, pgno, pagep)
* the page on which the insert should happen, not yet put.
*/
static int
-__db_dsplit(dbp, hp, indxp, size, newfunc)
- DB *dbp;
+__db_dsplit(dbc, hp, indxp, size, newfunc)
+ DBC *dbc;
PAGE **hp;
db_indx_t *indxp;
u_int32_t size;
- int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+ int (*newfunc) __P((DBC *, u_int32_t, PAGE **));
{
PAGE *h, *np, *tp;
BKEYDATA *bk;
DBT page_dbt;
+ DB *dbp;
+ size_t pgsize;
db_indx_t halfbytes, i, indx, lastsum, nindex, oindex, s, sum;
- int did_indx, ret;
+ int did_indx, ret, t_ret;
h = *hp;
indx = *indxp;
+ ret = 0;
+ dbp = dbc->dbp;
+ pgsize = dbp->pgsize;
/* Create a temporary page to do compaction onto. */
- if ((tp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL)
- return (ENOMEM);
-#ifdef DIAGNOSTIC
- memset(tp, 0xff, dbp->pgsize);
-#endif
+ if ((ret = __os_malloc(pgsize, NULL, &tp)) != 0)
+ return (ret);
+
/* Create new page for the split. */
- if ((ret = newfunc(dbp, P_DUPLICATE, &np)) != 0) {
- FREE(tp, dbp->pgsize);
+ if ((ret = newfunc(dbc, P_DUPLICATE, &np)) != 0) {
+ __os_free(tp, pgsize);
return (ret);
}
- P_INIT(np, dbp->pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0,
+ P_INIT(np, pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0,
P_DUPLICATE);
- P_INIT(tp, dbp->pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0,
+ P_INIT(tp, pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0,
P_DUPLICATE);
/* Figure out the split point */
- halfbytes = (dbp->pgsize - HOFFSET(h)) / 2;
+ halfbytes = (pgsize - HOFFSET(h)) / 2;
did_indx = 0;
for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) {
if (i == indx) {
@@ -237,7 +254,6 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
(db_indx_t)(sum - halfbytes)) {
*hp = np;
*indxp = 0;
- i--;
} else
*indxp = i;
break;
@@ -252,29 +268,28 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
if (lastsum < halfbytes && sum >= halfbytes) {
/* We've crossed the halfway point. */
- if ((db_indx_t)(halfbytes - lastsum) <
- (db_indx_t)(sum - halfbytes))
- i--;
+ if ((db_indx_t)(sum - halfbytes) <
+ (db_indx_t)(halfbytes - lastsum))
+ i++;
break;
}
}
-
/*
* Check if we have set the return values of the index pointer and
* page pointer.
*/
if (!did_indx) {
*hp = np;
- *indxp = indx - i - 1;
+ *indxp = indx - i;
}
- if (DB_LOGGING(dbp)) {
+ if (DB_LOGGING(dbc)) {
page_dbt.size = dbp->pgsize;
page_dbt.data = h;
if ((ret = __db_split_log(dbp->dbenv->lg_info,
- dbp->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid,
+ dbc->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid,
PGNO(h), &page_dbt, &LSN(h))) != 0) {
- FREE(tp, dbp->pgsize);
+ __os_free(tp, pgsize);
return (ret);
}
LSN(tp) = LSN(h);
@@ -283,12 +298,12 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
/*
* If it's a btree, adjust the cursors.
*
- * i is the index of the last element to stay on the page.
+ * i is the index of the first element to move onto the new page.
*/
- if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
- __bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i + 1, 0);
+ if (dbp->type == DB_BTREE)
+ __bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i, 0);
- for (nindex = 0, oindex = i + 1; oindex < NUM_ENT(h); oindex++) {
+ for (nindex = 0, oindex = i; oindex < NUM_ENT(h); oindex++) {
bk = GET_BKEYDATA(h, oindex);
if (B_TYPE(bk->type) == B_KEYDATA)
s = BKEYDATA_SIZE(bk->len);
@@ -304,7 +319,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
* Now do data compaction by copying the remaining stuff onto the
* temporary page and then copying it back to the real page.
*/
- for (nindex = 0, oindex = 0; oindex <= i; oindex++) {
+ for (nindex = 0, oindex = 0; oindex < i; oindex++) {
bk = GET_BKEYDATA(h, oindex);
if (B_TYPE(bk->type) == B_KEYDATA)
s = BKEYDATA_SIZE(bk->len);
@@ -324,59 +339,73 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
*/
memcpy(h, tp, LOFFSET(tp));
memcpy((u_int8_t *)h + HOFFSET(tp),
- (u_int8_t *)tp + HOFFSET(tp), dbp->pgsize - HOFFSET(tp));
- FREE(tp, dbp->pgsize);
+ (u_int8_t *)tp + HOFFSET(tp), pgsize - HOFFSET(tp));
+ __os_free(tp, pgsize);
- if (DB_LOGGING(dbp)) {
- page_dbt.size = dbp->pgsize;
+ if (DB_LOGGING(dbc)) {
+ /*
+ * XXX
+ * If either of these fails, are we leaving pages pinned?
+ * Yes, but it seems like this happens in error case.
+ */
+ page_dbt.size = pgsize;
page_dbt.data = h;
if ((ret = __db_split_log(dbp->dbenv->lg_info,
- dbp->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid,
+ dbc->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid,
PGNO(h), &page_dbt, &LSN(h))) != 0)
return (ret);
- page_dbt.size = dbp->pgsize;
+ page_dbt.size = pgsize;
page_dbt.data = np;
if ((ret = __db_split_log(dbp->dbenv->lg_info,
- dbp->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid,
+ dbc->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid,
PGNO(np), &page_dbt, &LSN(np))) != 0)
return (ret);
}
/*
+ * Finally, if there was a next page after the page being
+ * split, fix its prev pointer.
+ */
+ if (np->next_pgno != PGNO_INVALID)
+ ret = __db_relink(dbc, DB_ADD_PAGE, np, NULL, 1);
+
+ /*
* Figure out if the location we're interested in is on the new
* page, and if so, reset the callers' pointer. Push the other
* page back to the store.
*/
if (*hp == h)
- ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
+ t_ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
else
- ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+ t_ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
- return (ret);
+ return (ret != 0 ? ret : t_ret);
}
/*
* __db_ditem --
* Remove an item from a page.
*
- * PUBLIC: int __db_ditem __P((DB *, PAGE *, u_int32_t, u_int32_t));
+ * PUBLIC: int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t));
*/
int
-__db_ditem(dbp, pagep, indx, nbytes)
- DB *dbp;
+__db_ditem(dbc, pagep, indx, nbytes)
+ DBC *dbc;
PAGE *pagep;
u_int32_t indx, nbytes;
{
+ DB *dbp;
DBT ldbt;
db_indx_t cnt, offset;
int ret;
u_int8_t *from;
- if (DB_LOGGING(dbp)) {
+ dbp = dbc->dbp;
+ if (DB_LOGGING(dbc)) {
ldbt.data = P_ENTRY(pagep, indx);
ldbt.size = nbytes;
- if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn,
+ if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbc->txn,
&LSN(pagep), 0, DB_REM_DUP, dbp->log_fileid, PGNO(pagep),
(u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0)
return (ret);
@@ -413,7 +442,7 @@ __db_ditem(dbp, pagep, indx, nbytes)
sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
/* If it's a btree, adjust the cursors. */
- if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
+ if (dbp->type == DB_BTREE)
__bam_ca_di(dbp, PGNO(pagep), indx, -1);
return (0);
@@ -424,16 +453,17 @@ __db_ditem(dbp, pagep, indx, nbytes)
* Put an item on a page.
*
* PUBLIC: int __db_pitem
- * PUBLIC: __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
+ * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
*/
int
-__db_pitem(dbp, pagep, indx, nbytes, hdr, data)
- DB *dbp;
+__db_pitem(dbc, pagep, indx, nbytes, hdr, data)
+ DBC *dbc;
PAGE *pagep;
u_int32_t indx;
u_int32_t nbytes;
DBT *hdr, *data;
{
+ DB *dbp;
BKEYDATA bk;
DBT thdr;
int ret;
@@ -456,8 +486,9 @@ __db_pitem(dbp, pagep, indx, nbytes, hdr, data)
* the passed in header sizes must be adjusted for the structure's
* placeholder for the trailing variable-length data field.
*/
- if (DB_LOGGING(dbp))
- if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn,
+ dbp = dbc->dbp;
+ if (DB_LOGGING(dbc))
+ if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbc->txn,
&LSN(pagep), 0, DB_ADD_DUP, dbp->log_fileid, PGNO(pagep),
(u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0)
return (ret);
@@ -485,7 +516,7 @@ __db_pitem(dbp, pagep, indx, nbytes, hdr, data)
memcpy(p + hdr->size, data->data, data->size);
/* If it's a btree, adjust the cursors. */
- if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
+ if (dbp->type == DB_BTREE)
__bam_ca_di(dbp, PGNO(pagep), indx, 1);
return (0);
@@ -495,14 +526,16 @@ __db_pitem(dbp, pagep, indx, nbytes, hdr, data)
* __db_relink --
* Relink around a deleted page.
*
- * PUBLIC: int __db_relink __P((DB *, PAGE *, PAGE **, int));
+ * PUBLIC: int __db_relink __P((DBC *, u_int32_t, PAGE *, PAGE **, int));
*/
int
-__db_relink(dbp, pagep, new_next, needlock)
- DB *dbp;
+__db_relink(dbc, add_rem, pagep, new_next, needlock)
+ DBC *dbc;
+ u_int32_t add_rem;
PAGE *pagep, **new_next;
int needlock;
{
+ DB *dbp;
PAGE *np, *pp;
DB_LOCK npl, ppl;
DB_LSN *nlsnp, *plsnp;
@@ -512,10 +545,15 @@ __db_relink(dbp, pagep, new_next, needlock)
np = pp = NULL;
npl = ppl = LOCK_INVALID;
nlsnp = plsnp = NULL;
+ dbp = dbc->dbp;
- /* Retrieve and lock the two pages. */
+ /*
+ * Retrieve and lock the one/two pages. For a remove, we may need
+ * two pages (the before and after). For an add, we only need one
+ * because, the split took care of the prev.
+ */
if (pagep->next_pgno != PGNO_INVALID) {
- if (needlock && (ret = __bam_lget(dbp,
+ if (needlock && (ret = __bam_lget(dbc,
0, pagep->next_pgno, DB_LOCK_WRITE, &npl)) != 0)
goto err;
if ((ret = memp_fget(dbp->mpf,
@@ -525,8 +563,8 @@ __db_relink(dbp, pagep, new_next, needlock)
}
nlsnp = &np->lsn;
}
- if (pagep->prev_pgno != PGNO_INVALID) {
- if (needlock && (ret = __bam_lget(dbp,
+ if (add_rem == DB_REM_PAGE && pagep->prev_pgno != PGNO_INVALID) {
+ if (needlock && (ret = __bam_lget(dbc,
0, pagep->prev_pgno, DB_LOCK_WRITE, &ppl)) != 0)
goto err;
if ((ret = memp_fget(dbp->mpf,
@@ -538,9 +576,10 @@ __db_relink(dbp, pagep, new_next, needlock)
}
/* Log the change. */
- if (DB_LOGGING(dbp)) {
- if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbp->txn,
- &pagep->lsn, 0, dbp->log_fileid, pagep->pgno, &pagep->lsn,
+ if (DB_LOGGING(dbc)) {
+ if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbc->txn,
+ &pagep->lsn, 0, add_rem, dbp->log_fileid,
+ pagep->pgno, &pagep->lsn,
pagep->prev_pgno, plsnp, pagep->next_pgno, nlsnp)) != 0)
goto err;
if (np != NULL)
@@ -558,7 +597,10 @@ __db_relink(dbp, pagep, new_next, needlock)
* set to NULL.
*/
if (np != NULL) {
- np->prev_pgno = pagep->prev_pgno;
+ if (add_rem == DB_ADD_PAGE)
+ np->prev_pgno = pagep->pgno;
+ else
+ np->prev_pgno = pagep->prev_pgno;
if (new_next == NULL)
ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
else {
@@ -568,7 +610,7 @@ __db_relink(dbp, pagep, new_next, needlock)
if (ret != 0)
goto err;
if (needlock)
- (void)__bam_lput(dbp, npl);
+ (void)__bam_lput(dbc, npl);
} else if (new_next != NULL)
*new_next = NULL;
@@ -577,18 +619,18 @@ __db_relink(dbp, pagep, new_next, needlock)
if ((ret = memp_fput(dbp->mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto err;
if (needlock)
- (void)__bam_lput(dbp, ppl);
+ (void)__bam_lput(dbc, ppl);
}
return (0);
err: if (np != NULL)
(void)memp_fput(dbp->mpf, np, 0);
if (needlock && npl != LOCK_INVALID)
- (void)__bam_lput(dbp, npl);
+ (void)__bam_lput(dbc, npl);
if (pp != NULL)
(void)memp_fput(dbp->mpf, pp, 0);
if (needlock && ppl != LOCK_INVALID)
- (void)__bam_lput(dbp, ppl);
+ (void)__bam_lput(dbc, ppl);
return (ret);
}
@@ -596,34 +638,37 @@ err: if (np != NULL)
* __db_ddup --
* Delete an offpage chain of duplicates.
*
- * PUBLIC: int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+ * PUBLIC: int __db_ddup __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *)));
*/
int
-__db_ddup(dbp, pgno, freefunc)
- DB *dbp;
+__db_ddup(dbc, pgno, freefunc)
+ DBC *dbc;
db_pgno_t pgno;
- int (*freefunc) __P((DB *, PAGE *));
+ int (*freefunc) __P((DBC *, PAGE *));
{
+ DB *dbp;
PAGE *pagep;
DBT tmp_dbt;
int ret;
+ dbp = dbc->dbp;
do {
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
- if (DB_LOGGING(dbp)) {
+ if (DB_LOGGING(dbc)) {
tmp_dbt.data = pagep;
tmp_dbt.size = dbp->pgsize;
- if ((ret = __db_split_log(dbp->dbenv->lg_info, dbp->txn,
- &LSN(pagep), 0, DB_SPLITOLD, dbp->log_fileid,
- PGNO(pagep), &tmp_dbt, &LSN(pagep))) != 0)
+ if ((ret = __db_split_log(dbp->dbenv->lg_info,
+ dbc->txn, &LSN(pagep), 0, DB_SPLITOLD,
+ dbp->log_fileid, PGNO(pagep), &tmp_dbt,
+ &LSN(pagep))) != 0)
return (ret);
}
pgno = pagep->next_pgno;
- if ((ret = freefunc(dbp, pagep)) != 0)
+ if ((ret = freefunc(dbc, pagep)) != 0)
return (ret);
} while (pgno != PGNO_INVALID);
@@ -636,21 +681,23 @@ __db_ddup(dbp, pgno, freefunc)
* current page.
*/
static int
-__db_addpage(dbp, hp, indxp, newfunc)
- DB *dbp;
+__db_addpage(dbc, hp, indxp, newfunc)
+ DBC *dbc;
PAGE **hp;
db_indx_t *indxp;
- int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+ int (*newfunc) __P((DBC *, u_int32_t, PAGE **));
{
+ DB *dbp;
PAGE *newpage;
int ret;
- if ((ret = newfunc(dbp, P_DUPLICATE, &newpage)) != 0)
+ dbp = dbc->dbp;
+ if ((ret = newfunc(dbc, P_DUPLICATE, &newpage)) != 0)
return (ret);
- if (DB_LOGGING(dbp)) {
+ if (DB_LOGGING(dbc)) {
if ((ret = __db_addpage_log(dbp->dbenv->lg_info,
- dbp->txn, &LSN(*hp), 0, dbp->log_fileid,
+ dbc->txn, &LSN(*hp), 0, dbp->log_fileid,
PGNO(*hp), &LSN(*hp), PGNO(newpage), &LSN(newpage))) != 0) {
return (ret);
}
@@ -666,3 +713,235 @@ __db_addpage(dbp, hp, indxp, newfunc)
*indxp = 0;
return (0);
}
+
+/*
+ * __db_dsearch --
+ * Search a set of duplicates for the proper position for a new duplicate.
+ *
+ * + pgno is the page number of the page on which to begin searching.
+ * Since we can continue duplicate searches, it might not be the first
+ * page.
+ *
+ * + If we are continuing a search, then *pp may be non-NULL in which
+ * case we do not have to retrieve the page.
+ *
+ * + If we are continuing a search, then *indxp contains the first
+ * on pgno of where we should begin the search.
+ *
+ * NOTE: if there is no comparison function, then continuing is
+ * meaningless, and *pp should always be NULL and *indxp will be
+ * ignored.
+ *
+ * 3 return values::
+ *
+ * + pp is the returned page pointer of where this element should go.
+ * + indxp is the returned index on that page
+ * + cmpp is the returned final comparison result.
+ *
+ * PUBLIC: int __db_dsearch __P((DBC *,
+ * PUBLIC: int, DBT *, db_pgno_t, db_indx_t *, PAGE **, int *));
+ */
+int
+__db_dsearch(dbc, is_insert, dbt, pgno, indxp, pp, cmpp)
+ DBC *dbc;
+ int is_insert, *cmpp;
+ DBT *dbt;
+ db_pgno_t pgno;
+ db_indx_t *indxp;
+ PAGE **pp;
+{
+ DB *dbp;
+ PAGE *h;
+ db_indx_t base, indx, lim, save_indx;
+ db_pgno_t save_pgno;
+ int ret;
+
+ dbp = dbc->dbp;
+
+ if (dbp->dup_compare == NULL) {
+ /*
+ * We may have been given a valid page, but we may not be
+ * able to use it. The problem is that the application is
+ * doing a join and we're trying to continue the search,
+ * but since the items aren't sorted, we can't. Discard
+ * the page if it's not the one we're going to start with
+ * anyway.
+ */
+ if (*pp != NULL && (*pp)->pgno != pgno) {
+ if ((ret = memp_fput(dbp->mpf, *pp, 0)) != 0)
+ return (ret);
+ *pp = NULL;
+ }
+
+ /*
+ * If no duplicate function is specified, just go to the end
+ * of the duplicate set.
+ */
+ if (is_insert) {
+ if ((ret = __db_dend(dbc, pgno, pp)) != 0)
+ return (ret);
+ *indxp = NUM_ENT(*pp);
+ return (0);
+ }
+
+ /*
+ * We are looking for a specific duplicate, so do a linear
+ * search.
+ */
+ if (*pp != NULL)
+ goto nocmp_started;
+ for (;;) {
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0)
+ goto pg_err;
+nocmp_started: h = *pp;
+
+ for (*indxp = 0; *indxp < NUM_ENT(h); ++*indxp) {
+ if ((*cmpp = __bam_cmp(dbp,
+ dbt, h, *indxp, __bam_defcmp)) != 0)
+ continue;
+ /*
+ * The duplicate may have already been deleted,
+ * if it's a btree page, in which case we skip
+ * it.
+ */
+ if (dbp->type == DB_BTREE &&
+ B_DISSET(GET_BKEYDATA(h, *indxp)->type))
+ continue;
+
+ return (0);
+ }
+
+ if ((pgno = h->next_pgno) == PGNO_INVALID)
+ break;
+
+ if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ return (ret);
+ }
+ *cmpp = 1; /* We didn't succeed... */
+ return (0);
+ }
+
+ /*
+ * We have a comparison routine, i.e., the duplicates are sorted.
+ * Walk through the chain of duplicates, checking the last entry
+ * on each page to decide if it's the page we want to search.
+ *
+ * *pp may be non-NULL -- if we were given a valid page (e.g., are
+ * in mid-search), then use the provided page.
+ */
+ if (*pp != NULL)
+ goto cmp_started;
+ for (;;) {
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0)
+ goto pg_err;
+cmp_started: h = *pp;
+
+ if ((pgno = h->next_pgno) == PGNO_INVALID || __bam_cmp(dbp,
+ dbt, h, h->entries - 1, dbp->dup_compare) <= 0)
+ break;
+ /*
+ * Even when continuing a search, make sure we don't skip
+ * entries on a new page
+ */
+ *indxp = 0;
+
+ if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ return (ret);
+ }
+
+ /* Next, do a binary search on the page. */
+ base = F_ISSET(dbc, DBC_CONTINUE) ? *indxp : 0;
+ for (lim = NUM_ENT(h) - base; lim != 0; lim >>= 1) {
+ indx = base + (lim >> 1);
+ if ((*cmpp = __bam_cmp(dbp,
+ dbt, h, indx, dbp->dup_compare)) == 0) {
+ *indxp = indx;
+
+ if (dbp->type != DB_BTREE ||
+ !B_DISSET(GET_BKEYDATA(h, *indxp)->type))
+ return (0);
+ goto check_delete;
+ }
+ if (*cmpp > 0) {
+ base = indx + 1;
+ lim--;
+ }
+ }
+
+ /*
+ * Base references the smallest index larger than the supplied DBT's
+ * data item, potentially both 0 and NUM_ENT.
+ */
+ *indxp = base;
+ return (0);
+
+check_delete:
+ /*
+ * The duplicate may have already been deleted, if it's a btree page,
+ * in which case we wander around, hoping to find an entry that hasn't
+ * been deleted. First, wander in a forwardly direction.
+ */
+ save_pgno = (*pp)->pgno;
+ save_indx = *indxp;
+ for (++*indxp;;) {
+ for (; *indxp < NUM_ENT(h); ++*indxp) {
+ if ((*cmpp = __bam_cmp(dbp,
+ dbt, h, *indxp, dbp->dup_compare)) != 0)
+ goto check_delete_rev;
+
+ if (!B_DISSET(GET_BKEYDATA(h, *indxp)->type))
+ return (0);
+ }
+ if ((pgno = h->next_pgno) == PGNO_INVALID)
+ break;
+
+ if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ return (ret);
+
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0)
+ goto pg_err;
+ h = *pp;
+
+ *indxp = 0;
+ }
+
+check_delete_rev:
+ /* Go back to where we started, and wander in a backwardly direction. */
+ if (h->pgno != save_pgno) {
+ if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ return (ret);
+ if ((ret = memp_fget(dbp->mpf, &save_pgno, 0, pp)) != 0)
+ goto pg_err;
+ h = *pp;
+ }
+
+ for (;;) {
+ while (*indxp > 0) {
+ --*indxp;
+ if ((*cmpp = __bam_cmp(dbp,
+ dbt, h, *indxp, dbp->dup_compare)) != 0)
+ goto check_delete_fail;
+
+ if (!B_DISSET(GET_BKEYDATA(h, *indxp)->type))
+ return (0);
+ }
+ if ((pgno = h->prev_pgno) == PGNO_INVALID)
+ break;
+
+ if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ return (ret);
+
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0)
+ goto pg_err;
+ h = *pp;
+
+ *indxp = NUM_ENT(h);
+ }
+
+check_delete_fail:
+ *cmpp = 1; /* We didn't succeed... */
+ return (0);
+
+pg_err: __db_pgerr(dbp, pgno);
+ return (ret);
+}
diff --git a/db2/db/db_iface.c b/db2/db/db_iface.c
new file mode 100644
index 0000000000..4ebf3ba019
--- /dev/null
+++ b/db2/db/db_iface.c
@@ -0,0 +1,488 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_iface.c 10.40 (Sleepycat) 12/19/98";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_auto.h"
+#include "db_ext.h"
+#include "common_ext.h"
+
+static int __db_keyempty __P((const DB_ENV *));
+static int __db_rdonly __P((const DB_ENV *, const char *));
+static int __dbt_ferr __P((const DB *, const char *, const DBT *, int));
+
+/*
+ * __db_cdelchk --
+ * Common cursor delete argument checking routine.
+ *
+ * PUBLIC: int __db_cdelchk __P((const DB *, u_int32_t, int, int));
+ */
+int
+__db_cdelchk(dbp, flags, isrdonly, isvalid)
+ const DB *dbp;
+ u_int32_t flags;
+ int isrdonly, isvalid;
+{
+ /* Check for changes to a read-only tree. */
+ if (isrdonly)
+ return (__db_rdonly(dbp->dbenv, "c_del"));
+
+ /* Check for invalid function flags. */
+ switch (flags) {
+ case 0:
+ break;
+ default:
+ return (__db_ferr(dbp->dbenv, "DBcursor->c_del", 0));
+ }
+
+ /*
+ * The cursor must be initialized, return -1 for an invalid cursor,
+ * otherwise 0.
+ */
+ return (isvalid ? 0 : EINVAL);
+}
+
+/*
+ * __db_cgetchk --
+ * Common cursor get argument checking routine.
+ *
+ * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int));
+ */
+int
+__db_cgetchk(dbp, key, data, flags, isvalid)
+ const DB *dbp;
+ DBT *key, *data;
+ u_int32_t flags;
+ int isvalid;
+{
+ int key_einval, key_flags, ret;
+
+ key_einval = key_flags = 0;
+
+ /* Check for invalid function flags. */
+ LF_CLR(DB_RMW);
+ switch (flags) {
+ case DB_NEXT_DUP:
+ if (dbp->type == DB_RECNO)
+ goto err;
+ /* FALLTHROUGH */
+ case DB_CURRENT:
+ case DB_FIRST:
+ case DB_LAST:
+ case DB_NEXT:
+ case DB_PREV:
+ key_flags = 1;
+ break;
+ case DB_GET_BOTH:
+ case DB_SET_RANGE:
+ key_einval = key_flags = 1;
+ break;
+ case DB_SET:
+ key_einval = 1;
+ break;
+ case DB_GET_RECNO:
+ if (!F_ISSET(dbp, DB_BT_RECNUM))
+ goto err;
+ break;
+ case DB_SET_RECNO:
+ if (!F_ISSET(dbp, DB_BT_RECNUM))
+ goto err;
+ key_einval = key_flags = 1;
+ break;
+ default:
+err: return (__db_ferr(dbp->dbenv, "DBcursor->c_get", 0));
+ }
+
+ /* Check for invalid key/data flags. */
+ if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
+ return (ret);
+ if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
+ return (ret);
+
+ /* Check for missing keys. */
+ if (key_einval && (key->data == NULL || key->size == 0))
+ return (__db_keyempty(dbp->dbenv));
+
+ /*
+ * The cursor must be initialized for DB_CURRENT, return -1 for an
+ * invalid cursor, otherwise 0.
+ */
+ return (isvalid || flags != DB_CURRENT ? 0 : EINVAL);
+}
+
+/*
+ * __db_cputchk --
+ * Common cursor put argument checking routine.
+ *
+ * PUBLIC: int __db_cputchk __P((const DB *,
+ * PUBLIC: const DBT *, DBT *, u_int32_t, int, int));
+ */
+int
+__db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
+ const DB *dbp;
+ const DBT *key;
+ DBT *data;
+ u_int32_t flags;
+ int isrdonly, isvalid;
+{
+ int key_einval, key_flags, ret;
+
+ key_einval = key_flags = 0;
+
+ /* Check for changes to a read-only tree. */
+ if (isrdonly)
+ return (__db_rdonly(dbp->dbenv, "c_put"));
+
+ /* Check for invalid function flags. */
+ switch (flags) {
+ case DB_AFTER:
+ case DB_BEFORE:
+ if (dbp->dup_compare != NULL)
+ goto err;
+ if (dbp->type == DB_RECNO && !F_ISSET(dbp, DB_RE_RENUMBER))
+ goto err;
+ if (dbp->type != DB_RECNO && !F_ISSET(dbp, DB_AM_DUP))
+ goto err;
+ break;
+ case DB_CURRENT:
+ /*
+ * If there is a comparison function, doing a DB_CURRENT
+ * must not change the part of the data item that is used
+ * for the comparison.
+ */
+ break;
+ case DB_KEYFIRST:
+ case DB_KEYLAST:
+ if (dbp->type == DB_RECNO)
+ goto err;
+ key_einval = key_flags = 1;
+ break;
+ default:
+err: return (__db_ferr(dbp->dbenv, "DBcursor->c_put", 0));
+ }
+
+ /* Check for invalid key/data flags. */
+ if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
+ return (ret);
+ if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
+ return (ret);
+
+ /* Check for missing keys. */
+ if (key_einval && (key->data == NULL || key->size == 0))
+ return (__db_keyempty(dbp->dbenv));
+
+ /*
+ * The cursor must be initialized for anything other than DB_KEYFIRST
+ * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0.
+ */
+ return (isvalid ||
+ flags == DB_KEYFIRST || flags == DB_KEYLAST ? 0 : EINVAL);
+}
+
+/*
+ * __db_closechk --
+ * DB->close flag check.
+ *
+ * PUBLIC: int __db_closechk __P((const DB *, u_int32_t));
+ */
+int
+__db_closechk(dbp, flags)
+ const DB *dbp;
+ u_int32_t flags;
+{
+ /* Check for invalid function flags. */
+ if (flags != 0 && flags != DB_NOSYNC)
+ return (__db_ferr(dbp->dbenv, "DB->close", 0));
+
+ return (0);
+}
+
+/*
+ * __db_delchk --
+ * Common delete argument checking routine.
+ *
+ * PUBLIC: int __db_delchk __P((const DB *, DBT *, u_int32_t, int));
+ */
+int
+__db_delchk(dbp, key, flags, isrdonly)
+ const DB *dbp;
+ DBT *key;
+ u_int32_t flags;
+ int isrdonly;
+{
+ /* Check for changes to a read-only tree. */
+ if (isrdonly)
+ return (__db_rdonly(dbp->dbenv, "delete"));
+
+ /* Check for invalid function flags. */
+ switch (flags) {
+ case 0:
+ break;
+ default:
+ return (__db_ferr(dbp->dbenv, "DB->del", 0));
+ }
+
+ /* Check for missing keys. */
+ if (key->data == NULL || key->size == 0)
+ return (__db_keyempty(dbp->dbenv));
+
+ return (0);
+}
+
+/*
+ * __db_getchk --
+ * Common get argument checking routine.
+ *
+ * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t));
+ */
+int
+__db_getchk(dbp, key, data, flags)
+ const DB *dbp;
+ const DBT *key;
+ DBT *data;
+ u_int32_t flags;
+{
+ int ret;
+
+ /* Check for invalid function flags. */
+ LF_CLR(DB_RMW);
+ switch (flags) {
+ case 0:
+ case DB_GET_BOTH:
+ break;
+ case DB_SET_RECNO:
+ if (!F_ISSET(dbp, DB_BT_RECNUM))
+ goto err;
+ break;
+ default:
+err: return (__db_ferr(dbp->dbenv, "DB->get", 0));
+ }
+
+ /* Check for invalid key/data flags. */
+ if ((ret = __dbt_ferr(dbp, "key", key, flags == DB_SET_RECNO)) != 0)
+ return (ret);
+ if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0)
+ return (ret);
+
+ /* Check for missing keys. */
+ if (key->data == NULL || key->size == 0)
+ return (__db_keyempty(dbp->dbenv));
+
+ return (0);
+}
+
+/*
+ * __db_joinchk --
+ * Common join argument checking routine.
+ *
+ * PUBLIC: int __db_joinchk __P((const DB *, u_int32_t));
+ */
+int
+__db_joinchk(dbp, flags)
+ const DB *dbp;
+ u_int32_t flags;
+{
+ if (flags != 0)
+ return (__db_ferr(dbp->dbenv, "DB->join", 0));
+
+ return (0);
+}
+
+/*
+ * __db_putchk --
+ * Common put argument checking routine.
+ *
+ * PUBLIC: int __db_putchk
+ * PUBLIC: __P((const DB *, DBT *, const DBT *, u_int32_t, int, int));
+ */
+int
+__db_putchk(dbp, key, data, flags, isrdonly, isdup)
+ const DB *dbp;
+ DBT *key;
+ const DBT *data;
+ u_int32_t flags;
+ int isrdonly, isdup;
+{
+ int ret;
+
+ /* Check for changes to a read-only tree. */
+ if (isrdonly)
+ return (__db_rdonly(dbp->dbenv, "put"));
+
+ /* Check for invalid function flags. */
+ switch (flags) {
+ case 0:
+ case DB_NOOVERWRITE:
+ break;
+ case DB_APPEND:
+ if (dbp->type != DB_RECNO)
+ goto err;
+ break;
+ default:
+err: return (__db_ferr(dbp->dbenv, "DB->put", 0));
+ }
+
+ /* Check for invalid key/data flags. */
+ if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
+ return (ret);
+ if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
+ return (ret);
+
+ /* Check for missing keys. */
+ if (key->data == NULL || key->size == 0)
+ return (__db_keyempty(dbp->dbenv));
+
+ /* Check for partial puts in the presence of duplicates. */
+ if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) {
+ __db_err(dbp->dbenv,
+"a partial put in the presence of duplicates requires a cursor operation");
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * __db_statchk --
+ * Common stat argument checking routine.
+ *
+ * PUBLIC: int __db_statchk __P((const DB *, u_int32_t));
+ */
+int
+__db_statchk(dbp, flags)
+ const DB *dbp;
+ u_int32_t flags;
+{
+ /* Check for invalid function flags. */
+ switch (flags) {
+ case 0:
+ break;
+ case DB_RECORDCOUNT:
+ if (dbp->type == DB_RECNO)
+ break;
+ if (dbp->type == DB_BTREE && F_ISSET(dbp, DB_BT_RECNUM))
+ break;
+ goto err;
+ default:
+err: return (__db_ferr(dbp->dbenv, "DB->stat", 0));
+ }
+
+ return (0);
+}
+
+/*
+ * __db_syncchk --
+ * Common sync argument checking routine.
+ *
+ * PUBLIC: int __db_syncchk __P((const DB *, u_int32_t));
+ */
+int
+__db_syncchk(dbp, flags)
+ const DB *dbp;
+ u_int32_t flags;
+{
+ /* Check for invalid function flags. */
+ switch (flags) {
+ case 0:
+ break;
+ default:
+ return (__db_ferr(dbp->dbenv, "DB->sync", 0));
+ }
+
+ return (0);
+}
+
+/*
+ * __dbt_ferr --
+ * Check a DBT for flag errors.
+ */
+static int
+__dbt_ferr(dbp, name, dbt, check_thread)
+ const DB *dbp;
+ const char *name;
+ const DBT *dbt;
+ int check_thread;
+{
+ int ret;
+
+ /*
+ * Check for invalid DBT flags. We allow any of the flags to be
+ * specified to any DB or DBcursor call so that applications can
+ * set DB_DBT_MALLOC when retrieving a data item from a secondary
+ * database and then specify that same DBT as a key to a primary
+ * database, without having to clear flags.
+ */
+ if ((ret = __db_fchk(dbp->dbenv, name, dbt->flags,
+ DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL)) != 0)
+ return (ret);
+ if ((ret = __db_fcchk(dbp->dbenv, name,
+ dbt->flags, DB_DBT_MALLOC, DB_DBT_USERMEM)) != 0)
+ return (ret);
+
+ if (check_thread && F_ISSET(dbp, DB_AM_THREAD) &&
+ !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_USERMEM)) {
+ __db_err(dbp->dbenv,
+ "missing flag thread flag for %s DBT", name);
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * __db_eopnotsup --
+ * Common operation not supported message.
+ *
+ * PUBLIC: int __db_eopnotsup __P((const DB_ENV *));
+ */
+int
+__db_eopnotsup(dbenv)
+ const DB_ENV *dbenv;
+{
+ __db_err(dbenv, "operation not supported");
+#ifdef EOPNOTSUPP
+ return (EOPNOTSUPP);
+#else
+ return (EINVAL);
+#endif
+}
+
+/*
+ * __db_keyempty --
+ * Common missing or empty key value message.
+ */
+static int
+__db_keyempty(dbenv)
+ const DB_ENV *dbenv;
+{
+ __db_err(dbenv, "missing or empty key value specified");
+ return (EINVAL);
+}
+
+/*
+ * __db_rdonly --
+ * Common readonly message.
+ */
+static int
+__db_rdonly(dbenv, name)
+ const DB_ENV *dbenv;
+ const char *name;
+{
+ __db_err(dbenv, "%s: attempt to modify a read-only tree", name);
+ return (EACCES);
+}
diff --git a/db2/db/db_join.c b/db2/db/db_join.c
new file mode 100644
index 0000000000..a4051c20b0
--- /dev/null
+++ b/db2/db/db_join.c
@@ -0,0 +1,271 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_join.c 10.10 (Sleepycat) 10/9/98";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_join.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+static int __db_join_close __P((DBC *));
+static int __db_join_del __P((DBC *, u_int32_t));
+static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t));
+
+/*
+ * This is the duplicate-assisted join functionality. Right now we're
+ * going to write it such that we return one item at a time, although
+ * I think we may need to optimize it to return them all at once.
+ * It should be easier to get it working this way, and I believe that
+ * changing it should be fairly straightforward.
+ *
+ * XXX
+ * Right now we do not maintain the number of duplicates so we do
+ * not optimize the join. If the caller does, then best performance
+ * will be achieved by putting the cursor with the smallest cardinality
+ * first.
+ *
+ * The first cursor moves sequentially through the duplicate set while
+ * the others search explicitly for the duplicate in question.
+ *
+ */
+
+/*
+ * __db_join --
+ * This is the interface to the duplicate-assisted join functionality.
+ * In the same way that cursors mark a position in a database, a cursor
+ * can mark a position in a join. While most cursors are created by the
+ * cursor method of a DB, join cursors are created through an explicit
+ * call to DB->join.
+ *
+ * The curslist is an array of existing, intialized cursors and primary
+ * is the DB of the primary file. The data item that joins all the
+ * cursors in the curslist is used as the key into the primary and that
+ * key and data are returned. When no more items are left in the join
+ * set, the c_next operation off the join cursor will return DB_NOTFOUND.
+ *
+ * PUBLIC: int __db_join __P((DB *, DBC **, u_int32_t, DBC **));
+ */
+int
+__db_join(primary, curslist, flags, dbcp)
+ DB *primary;
+ DBC **curslist, **dbcp;
+ u_int32_t flags;
+{
+ DBC *dbc;
+ JOIN_CURSOR *jc;
+ int i, ret;
+
+ DB_PANIC_CHECK(primary);
+
+ if ((ret = __db_joinchk(primary, flags)) != 0)
+ return (ret);
+
+ if (curslist == NULL || curslist[0] == NULL)
+ return (EINVAL);
+
+ dbc = NULL;
+ jc = NULL;
+
+ if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0)
+ goto err;
+
+ if ((ret = __os_calloc(1, sizeof(JOIN_CURSOR), &jc)) != 0)
+ goto err;
+
+ if ((ret = __os_malloc(256, NULL, &jc->j_key.data)) != 0)
+ goto err;
+ jc->j_key.ulen = 256;
+ F_SET(&jc->j_key, DB_DBT_USERMEM);
+
+ for (jc->j_curslist = curslist;
+ *jc->j_curslist != NULL; jc->j_curslist++)
+ ;
+ if ((ret = __os_calloc((jc->j_curslist - curslist + 1),
+ sizeof(DBC *), &jc->j_curslist)) != 0)
+ goto err;
+ for (i = 0; curslist[i] != NULL; i++) {
+ if (i != 0)
+ F_SET(curslist[i], DBC_KEYSET);
+ jc->j_curslist[i] = curslist[i];
+ }
+
+ dbc->c_close = __db_join_close;
+ dbc->c_del = __db_join_del;
+ dbc->c_get = __db_join_get;
+ dbc->c_put = __db_join_put;
+ dbc->internal = jc;
+ dbc->dbp = primary;
+ jc->j_init = 1;
+ jc->j_primary = primary;
+
+ *dbcp = dbc;
+
+ return (0);
+
+err: if (jc != NULL) {
+ if (jc->j_curslist != NULL)
+ __os_free(jc->j_curslist,
+ (jc->j_curslist - curslist + 1) * sizeof(DBC *));
+ __os_free(jc, sizeof(JOIN_CURSOR));
+ }
+ if (dbc != NULL)
+ __os_free(dbc, sizeof(DBC));
+ return (ret);
+}
+
+static int
+__db_join_put(dbc, key, data, flags)
+ DBC *dbc;
+ DBT *key;
+ DBT *data;
+ u_int32_t flags;
+{
+ DB_PANIC_CHECK(dbc->dbp);
+
+ COMPQUIET(key, NULL);
+ COMPQUIET(data, NULL);
+ COMPQUIET(flags, 0);
+ return (EINVAL);
+}
+
+static int
+__db_join_del(dbc, flags)
+ DBC *dbc;
+ u_int32_t flags;
+{
+ DB_PANIC_CHECK(dbc->dbp);
+
+ COMPQUIET(flags, 0);
+ return (EINVAL);
+}
+
+static int
+__db_join_get(dbc, key, data, flags)
+ DBC *dbc;
+ DBT *key, *data;
+ u_int32_t flags;
+{
+ DB *dbp;
+ DBC **cpp;
+ JOIN_CURSOR *jc;
+ int ret;
+ u_int32_t operation;
+
+ dbp = dbc->dbp;
+
+ DB_PANIC_CHECK(dbp);
+
+ operation = LF_ISSET(DB_OPFLAGS_MASK);
+ if (operation != 0 && operation != DB_JOIN_ITEM)
+ return (__db_ferr(dbp->dbenv, "DBcursor->c_get", 0));
+
+ LF_CLR(DB_OPFLAGS_MASK);
+ if ((ret =
+ __db_fchk(dbp->dbenv, "DBcursor->c_get", flags, DB_RMW)) != 0)
+ return (ret);
+
+ jc = (JOIN_CURSOR *)dbc->internal;
+retry:
+ ret = jc->j_curslist[0]->c_get(jc->j_curslist[0],
+ &jc->j_key, key, jc->j_init ? DB_CURRENT : DB_NEXT_DUP);
+
+ if (ret == ENOMEM) {
+ jc->j_key.ulen <<= 1;
+ if ((ret = __os_realloc(&jc->j_key.data, jc->j_key.ulen)) != 0)
+ return (ret);
+ goto retry;
+ }
+ if (ret != 0)
+ return (ret);
+
+ jc->j_init = 0;
+ do {
+ /*
+ * We have the first element; now look for it in the
+ * other cursors.
+ */
+ for (cpp = jc->j_curslist + 1; *cpp != NULL; cpp++) {
+retry2: if ((ret = ((*cpp)->c_get)(*cpp,
+ &jc->j_key, key, DB_GET_BOTH)) == DB_NOTFOUND)
+ break;
+ if (ret == ENOMEM) {
+ jc->j_key.ulen <<= 1;
+ if ((ret = __os_realloc(&jc->j_key.data,
+ jc->j_key.ulen)) != 0)
+ return (ret);
+ goto retry2;
+ }
+ if (F_ISSET(*cpp, DBC_KEYSET)) {
+ F_CLR(*cpp, DBC_KEYSET);
+ F_SET(*cpp, DBC_CONTINUE);
+ }
+ }
+
+ /*
+ * If we got out of here with ret != 0, then we failed to
+ * find the duplicate in one of the files, so we go on to
+ * the next item in the outermost relation. If ret was
+ * equal to 0, then we've got something to return.
+ */
+ if (ret == 0)
+ break;
+ } while ((ret = jc->j_curslist[0]->c_get(jc->j_curslist[0],
+ &jc->j_key, key, DB_NEXT_DUP)) == 0);
+
+ /*
+ * If ret != 0 here, we've exhausted the first file. Otherwise,
+ * key and data are set and we need to do the lookup on the
+ * primary.
+ */
+ if (ret != 0)
+ return (ret);
+
+ if (operation == DB_JOIN_ITEM)
+ return (0);
+ else
+ return ((jc->j_primary->get)(jc->j_primary,
+ jc->j_curslist[0]->txn, key, data, 0));
+}
+
+static int
+__db_join_close(dbc)
+ DBC *dbc;
+{
+ JOIN_CURSOR *jc;
+ int i;
+
+ DB_PANIC_CHECK(dbc->dbp);
+
+ jc = (JOIN_CURSOR *)dbc->internal;
+
+ /*
+ * Clear the optimization flag in the cursors.
+ */
+ for (i = 0; jc->j_curslist[i] != NULL; i++)
+ F_CLR(jc->j_curslist[i], DBC_CONTINUE | DBC_KEYSET);
+
+ __os_free(jc->j_curslist, 0);
+ __os_free(jc->j_key.data, jc->j_key.ulen);
+ __os_free(jc, sizeof(JOIN_CURSOR));
+ __os_free(dbc, sizeof(DBC));
+
+ return (0);
+}
diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c
index d28740dcbe..0efcc9de7f 100644
--- a/db2/db/db_overflow.c
+++ b/db2/db/db_overflow.c
@@ -47,7 +47,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98";
+static const char sccsid[] = "@(#)db_overflow.c 10.21 (Sleepycat) 9/27/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -60,6 +60,7 @@ static const char sccsid[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98";
#include "db_int.h"
#include "db_page.h"
#include "db_am.h"
+#include "common_ext.h"
/*
* Big key/data code.
@@ -106,29 +107,20 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
needed = tlen;
}
- /*
- * Allocate any necessary memory.
- *
- * XXX: Never allocate 0 bytes;
- */
+ /* Allocate any necessary memory. */
if (F_ISSET(dbt, DB_DBT_USERMEM)) {
if (needed > dbt->ulen) {
dbt->size = needed;
return (ENOMEM);
}
} else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
- dbt->data = dbp->db_malloc == NULL ?
- (void *)__db_malloc(needed + 1) :
- (void *)dbp->db_malloc(needed + 1);
- if (dbt->data == NULL)
- return (ENOMEM);
+ if ((ret =
+ __os_malloc(needed, dbp->db_malloc, &dbt->data)) != 0)
+ return (ret);
} else if (*bpsz == 0 || *bpsz < needed) {
- *bpp = (*bpp == NULL ?
- (void *)__db_malloc(needed + 1) :
- (void *)__db_realloc(*bpp, needed + 1));
- if (*bpp == NULL)
- return (ENOMEM);
- *bpsz = needed + 1;
+ if ((ret = __os_realloc(bpp, needed)) != 0)
+ return (ret);
+ *bpsz = needed;
dbt->data = *bpp;
} else
dbt->data = *bpp;
@@ -168,16 +160,17 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
* __db_poff --
* Put an offpage item.
*
- * PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *,
- * PUBLIC: int (*)(DB *, u_int32_t, PAGE **)));
+ * PUBLIC: int __db_poff __P((DBC *, const DBT *, db_pgno_t *,
+ * PUBLIC: int (*)(DBC *, u_int32_t, PAGE **)));
*/
int
-__db_poff(dbp, dbt, pgnop, newfunc)
- DB *dbp;
+__db_poff(dbc, dbt, pgnop, newfunc)
+ DBC *dbc;
const DBT *dbt;
db_pgno_t *pgnop;
- int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+ int (*newfunc) __P((DBC *, u_int32_t, PAGE **));
{
+ DB *dbp;
PAGE *pagep, *lastp;
DB_LSN new_lsn, null_lsn;
DBT tmp_dbt;
@@ -191,6 +184,7 @@ __db_poff(dbp, dbt, pgnop, newfunc)
* number of bytes we get for pages we fill completely with a single
* item.
*/
+ dbp = dbc->dbp;
pagespace = P_MAXSPACE(dbp->pgsize);
lastp = NULL;
@@ -208,13 +202,13 @@ __db_poff(dbp, dbt, pgnop, newfunc)
* the item onto the page. If sz is less than pagespace, we
* have a partial record.
*/
- if ((ret = newfunc(dbp, P_OVERFLOW, &pagep)) != 0)
+ if ((ret = newfunc(dbc, P_OVERFLOW, &pagep)) != 0)
return (ret);
- if (DB_LOGGING(dbp)) {
+ if (DB_LOGGING(dbc)) {
tmp_dbt.data = p;
tmp_dbt.size = pagespace;
ZERO_LSN(null_lsn);
- if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
+ if ((ret = __db_big_log(dbp->dbenv->lg_info, dbc->txn,
&new_lsn, 0, DB_ADD_BIG, dbp->log_fileid,
PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID,
PGNO_INVALID, &tmp_dbt, &LSN(pagep),
@@ -256,24 +250,26 @@ __db_poff(dbp, dbt, pgnop, newfunc)
* __db_ovref --
* Increment/decrement the reference count on an overflow page.
*
- * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int32_t));
+ * PUBLIC: int __db_ovref __P((DBC *, db_pgno_t, int32_t));
*/
int
-__db_ovref(dbp, pgno, adjust)
- DB *dbp;
+__db_ovref(dbc, pgno, adjust)
+ DBC *dbc;
db_pgno_t pgno;
int32_t adjust;
{
+ DB *dbp;
PAGE *h;
int ret;
+ dbp = dbc->dbp;
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
- if (DB_LOGGING(dbp))
- if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn,
+ if (DB_LOGGING(dbc))
+ if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbc->txn,
&LSN(h), 0, dbp->log_fileid, h->pgno, adjust,
&LSN(h))) != 0)
return (ret);
@@ -287,19 +283,21 @@ __db_ovref(dbp, pgno, adjust)
* __db_doff --
* Delete an offpage chain of overflow pages.
*
- * PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+ * PUBLIC: int __db_doff __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *)));
*/
int
-__db_doff(dbp, pgno, freefunc)
- DB *dbp;
+__db_doff(dbc, pgno, freefunc)
+ DBC *dbc;
db_pgno_t pgno;
- int (*freefunc) __P((DB *, PAGE *));
+ int (*freefunc) __P((DBC *, PAGE *));
{
+ DB *dbp;
PAGE *pagep;
DB_LSN null_lsn;
DBT tmp_dbt;
int ret;
+ dbp = dbc->dbp;
do {
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(dbp, pgno);
@@ -312,21 +310,21 @@ __db_doff(dbp, pgno, freefunc)
*/
if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) {
(void)memp_fput(dbp->mpf, pagep, 0);
- return (__db_ovref(dbp, pgno, -1));
+ return (__db_ovref(dbc, pgno, -1));
}
- if (DB_LOGGING(dbp)) {
+ if (DB_LOGGING(dbc)) {
tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD;
tmp_dbt.size = OV_LEN(pagep);
ZERO_LSN(null_lsn);
- if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
+ if ((ret = __db_big_log(dbp->dbenv->lg_info, dbc->txn,
&LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid,
PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep),
&tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0)
return (ret);
}
pgno = pagep->next_pgno;
- if ((ret = freefunc(dbp, pagep)) != 0)
+ if ((ret = freefunc(dbc, pagep)) != 0)
return (ret);
} while (pgno != PGNO_INVALID);
@@ -339,44 +337,71 @@ __db_doff(dbp, pgno, freefunc)
*
* Given a starting page number and a key, return <0, 0, >0 to indicate if the
* key on the page is less than, equal to or greater than the key specified.
+ * We optimize this by doing chunk at a time comparison unless the user has
+ * specified a comparison function. In this case, we need to materialize
+ * the entire object and call their comparison routine.
*
- * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t));
+ * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t, u_int32_t,
+ * PUBLIC: int (*)(const DBT *, const DBT *), int *));
*/
int
-__db_moff(dbp, dbt, pgno)
+__db_moff(dbp, dbt, pgno, tlen, cmpfunc, cmpp)
DB *dbp;
const DBT *dbt;
db_pgno_t pgno;
+ u_int32_t tlen;
+ int (*cmpfunc) __P((const DBT *, const DBT *)), *cmpp;
{
PAGE *pagep;
- u_int32_t cmp_bytes, key_left;
+ DBT local_dbt;
+ void *buf;
+ u_int32_t bufsize, cmp_bytes, key_left;
u_int8_t *p1, *p2;
int ret;
+ /*
+ * If there is a user-specified comparison function, build a
+ * contiguous copy of the key, and call it.
+ */
+ if (cmpfunc != NULL) {
+ memset(&local_dbt, 0, sizeof(local_dbt));
+ buf = NULL;
+ bufsize = 0;
+
+ if ((ret = __db_goff(dbp,
+ &local_dbt, tlen, pgno, &buf, &bufsize)) != 0)
+ return (ret);
+ *cmpp = cmpfunc(&local_dbt, dbt);
+ __os_free(buf, bufsize);
+ return (0);
+ }
+
/* While there are both keys to compare. */
- for (ret = 0, p1 = dbt->data,
+ for (*cmpp = 0, p1 = dbt->data,
key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
- if (memp_fget(dbp->mpf, &pgno, 0, &pagep) != 0) {
- (void)__db_pgerr(dbp, pgno);
- return (0); /* No system error return. */
- }
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0)
+ return (ret);
cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
key_left -= cmp_bytes;
for (p2 =
(u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2)
if (*p1 != *p2) {
- ret = (long)*p1 - (long)*p2;
+ *cmpp = (long)*p1 - (long)*p2;
break;
}
pgno = NEXT_PGNO(pagep);
- (void)memp_fput(dbp->mpf, pagep, 0);
- if (ret != 0)
+ if ((ret = memp_fput(dbp->mpf, pagep, 0)) != 0)
return (ret);
+ if (*cmpp != 0)
+ return (0);
}
if (key_left > 0) /* DBT is longer than page key. */
- return (-1);
- if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */
- return (1);
+ *cmpp = -1;
+ else if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */
+ *cmpp = 1;
+ else
+ *cmpp = 0;
+
return (0);
}
diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c
index a294cdd135..7f4364c6e1 100644
--- a/db2/db/db_pr.c
+++ b/db2/db/db_pr.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_pr.c 10.29 (Sleepycat) 5/23/98";
+static const char sccsid[] = "@(#)db_pr.c 10.40 (Sleepycat) 11/22/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -126,11 +126,10 @@ __db_prdb(dbp)
{ DB_AM_MLOCAL, "local mpool" },
{ DB_AM_PGDEF, "default page size" },
{ DB_AM_RDONLY, "read-only" },
- { DB_AM_RECOVER, "recover" },
{ DB_AM_SWAP, "needswap" },
{ DB_AM_THREAD, "thread" },
- { DB_BT_RECNUM, "btree:records" },
- { DB_HS_DIRTYMETA, "hash:dirty-meta" },
+ { DB_BT_RECNUM, "btree:recnum" },
+ { DB_DBM_ERROR, "dbm/ndbm error" },
{ DB_RE_DELIMITER, "recno:delimiter" },
{ DB_RE_FIXEDLEN, "recno:fixed-length" },
{ DB_RE_PAD, "recno:pad" },
@@ -178,42 +177,55 @@ __db_prbtree(dbp)
static const FN mfn[] = {
{ BTM_DUP, "duplicates" },
{ BTM_RECNO, "recno" },
- { BTM_RECNUM, "btree:records" },
+ { BTM_RECNUM, "btree:recnum" },
{ BTM_FIXEDLEN, "recno:fixed-length" },
{ BTM_RENUMBER, "recno:renumber" },
{ 0 },
};
+ DBC *dbc;
BTMETA *mp;
BTREE *t;
- EPG *epg;
FILE *fp;
PAGE *h;
RECNO *rp;
db_pgno_t i;
- int ret;
+ int cnt, ret;
+ const char *sep;
t = dbp->internal;
fp = __db_prinit(NULL);
+ if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
+ return (ret);
(void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE);
i = PGNO_METADATA;
- if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0)
+ if ((ret = memp_fget(dbp->mpf, &i, 0, (PAGE **)&mp)) != 0) {
+ (void)dbc->c_close(dbc);
return (ret);
+ }
+ fprintf(fp, "lsn.file: %lu lsn.offset: %lu\n",
+ (u_long)LSN(mp).file, (u_long)LSN(mp).offset);
(void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic);
(void)fprintf(fp, "version %#lx\n", (u_long)mp->version);
(void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize);
(void)fprintf(fp, "maxkey: %lu minkey: %lu\n",
(u_long)mp->maxkey, (u_long)mp->minkey);
- (void)fprintf(fp, "free %lu", (u_long)mp->free);
- for (i = mp->free; i != PGNO_INVALID;) {
- if ((ret = __bam_pget(dbp, &h, &i, 0)) != 0)
+ (void)fprintf(fp, "free list: %lu", (u_long)mp->free);
+ for (i = mp->free, cnt = 0, sep = ", "; i != PGNO_INVALID;) {
+ if ((ret = memp_fget(dbp->mpf, &i, 0, &h)) != 0)
return (ret);
i = h->next_pgno;
(void)memp_fput(dbp->mpf, h, 0);
- (void)fprintf(fp, ", %lu", (u_long)i);
+ (void)fprintf(fp, "%s%lu", sep, (u_long)i);
+ if (++cnt % 10 == 0) {
+ (void)fprintf(fp, "\n");
+ cnt = 0;
+ sep = "";
+ } else
+ sep = ", ";
}
(void)fprintf(fp, "\n");
@@ -227,7 +239,7 @@ __db_prbtree(dbp)
(u_long)t->bt_maxkey, (u_long)t->bt_minkey);
(void)fprintf(fp, "bt_compare: %#lx bt_prefix: %#lx\n",
(u_long)t->bt_compare, (u_long)t->bt_prefix);
- if ((rp = t->bt_recno) != NULL) {
+ if ((rp = t->recno) != NULL) {
(void)fprintf(fp,
"re_delim: %#lx re_pad: %#lx re_len: %lu re_source: %s\n",
(u_long)rp->re_delim, (u_long)rp->re_pad,
@@ -238,13 +250,9 @@ __db_prbtree(dbp)
(u_long)rp->re_cmap, (u_long)rp->re_smap,
(u_long)rp->re_emap, (u_long)rp->re_msize);
}
- (void)fprintf(fp, "stack:");
- for (epg = t->bt_stack; epg < t->bt_sp; ++epg)
- (void)fprintf(fp, " %lu", (u_long)epg->page->pgno);
- (void)fprintf(fp, "\n");
(void)fprintf(fp, "ovflsize: %lu\n", (u_long)t->bt_ovflsize);
(void)fflush(fp);
- return (0);
+ return (dbc->c_close(dbc));
}
/*
@@ -258,51 +266,50 @@ __db_prhash(dbp)
DB *dbp;
{
FILE *fp;
- HTAB *t;
+ DBC *dbc;
+ HASH_CURSOR *hcp;
int i, put_page, ret;
db_pgno_t pgno;
- t = dbp->internal;
-
fp = __db_prinit(NULL);
+ if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
+ return (ret);
+ hcp = (HASH_CURSOR *)dbc->internal;
- fprintf(fp, "\thash_accesses %lu\n", (u_long)t->hash_accesses);
- fprintf(fp, "\thash_collisions %lu\n", (u_long)t->hash_collisions);
- fprintf(fp, "\thash_expansions %lu\n", (u_long)t->hash_expansions);
- fprintf(fp, "\thash_overflows %lu\n", (u_long)t->hash_overflows);
- fprintf(fp, "\thash_bigpages %lu\n", (u_long)t->hash_bigpages);
- fprintf(fp, "\n");
-
- if (t->hdr == NULL) {
+ /*
+ * In this case, hcp->hdr will never be null, if we decide
+ * to pass dbc's to this routine instead, then it could be.
+ */
+ if (hcp->hdr == NULL) {
pgno = PGNO_METADATA;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &t->hdr)) != 0)
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, &hcp->hdr)) != 0)
return (ret);
put_page = 1;
} else
put_page = 0;
- fprintf(fp, "\tmagic %#lx\n", (u_long)t->hdr->magic);
- fprintf(fp, "\tversion %lu\n", (u_long)t->hdr->version);
- fprintf(fp, "\tpagesize %lu\n", (u_long)t->hdr->pagesize);
- fprintf(fp, "\tovfl_point %lu\n", (u_long)t->hdr->ovfl_point);
- fprintf(fp, "\tlast_freed %lu\n", (u_long)t->hdr->last_freed);
- fprintf(fp, "\tmax_bucket %lu\n", (u_long)t->hdr->max_bucket);
- fprintf(fp, "\thigh_mask %#lx\n", (u_long)t->hdr->high_mask);
- fprintf(fp, "\tlow_mask %#lx\n", (u_long)t->hdr->low_mask);
- fprintf(fp, "\tffactor %lu\n", (u_long)t->hdr->ffactor);
- fprintf(fp, "\tnelem %lu\n", (u_long)t->hdr->nelem);
- fprintf(fp, "\th_charkey %#lx\n", (u_long)t->hdr->h_charkey);
+ fprintf(fp, "\tmagic %#lx\n", (u_long)hcp->hdr->magic);
+ fprintf(fp, "\tversion %lu\n", (u_long)hcp->hdr->version);
+ fprintf(fp, "\tpagesize %lu\n", (u_long)hcp->hdr->pagesize);
+ fprintf(fp, "\tovfl_point %lu\n", (u_long)hcp->hdr->ovfl_point);
+ fprintf(fp, "\tlast_freed %lu\n", (u_long)hcp->hdr->last_freed);
+ fprintf(fp, "\tmax_bucket %lu\n", (u_long)hcp->hdr->max_bucket);
+ fprintf(fp, "\thigh_mask %#lx\n", (u_long)hcp->hdr->high_mask);
+ fprintf(fp, "\tlow_mask %#lx\n", (u_long)hcp->hdr->low_mask);
+ fprintf(fp, "\tffactor %lu\n", (u_long)hcp->hdr->ffactor);
+ fprintf(fp, "\tnelem %lu\n", (u_long)hcp->hdr->nelem);
+ fprintf(fp, "\th_charkey %#lx\n", (u_long)hcp->hdr->h_charkey);
for (i = 0; i < NCACHED; i++)
- fprintf(fp, "%lu ", (u_long)t->hdr->spares[i]);
+ fprintf(fp, "%lu ", (u_long)hcp->hdr->spares[i]);
fprintf(fp, "\n");
(void)fflush(fp);
if (put_page) {
- (void)memp_fput(dbp->mpf, (PAGE *)t->hdr, 0);
- t->hdr = NULL;
+ (void)memp_fput(dbp->mpf, (PAGE *)hcp->hdr, 0);
+ hcp->hdr = NULL;
}
- return (0);
+ return (dbc->c_close(dbc));
}
/*
@@ -318,22 +325,18 @@ __db_prtree(mpf, all)
{
PAGE *h;
db_pgno_t i;
- int ret, t_ret;
if (set_psize == PSIZE_BOUNDARY)
__db_psize(mpf);
- ret = 0;
for (i = PGNO_ROOT;; ++i) {
- if ((ret = memp_fget(mpf, &i, 0, &h)) != 0)
+ if (memp_fget(mpf, &i, 0, &h) != 0)
break;
- if (TYPE(h) != P_INVALID)
- if ((t_ret = __db_prpage(h, all)) != 0 && ret == 0)
- ret = t_ret;
+ (void)__db_prpage(h, all);
(void)memp_fput(mpf, h, 0);
}
(void)fflush(__db_prinit(NULL));
- return (ret);
+ return (0);
}
/*
@@ -425,8 +428,7 @@ __db_prpage(h, all)
(TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT))
fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h));
fprintf(fp, "\n");
- if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO ||
- TYPE(h) == P_DUPLICATE || TYPE(h) == P_OVERFLOW)
+ if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO)
fprintf(fp, " prev: %4lu next: %4lu",
(u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h));
if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE)
diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c
index 1ef6f18e61..7f577b5855 100644
--- a/db2/db/db_rec.c
+++ b/db2/db/db_rec.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_rec.c 10.16 (Sleepycat) 4/28/98";
+static const char sccsid[] = "@(#)db_rec.c 10.19 (Sleepycat) 9/27/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -40,7 +40,8 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info)
void *info;
{
__db_addrem_args *argp;
- DB *file_dbp, *mdbp;
+ DB *file_dbp;
+ DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t change;
@@ -57,9 +58,7 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info)
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
- *lsnp = argp->prev_lsn;
- ret = 0;
- goto out;
+ goto done;
} else
if ((ret = memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
@@ -73,7 +72,7 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info)
(cmp_n == 0 && !redo && argp->opcode == DB_REM_DUP)) {
/* Need to redo an add, or undo a delete. */
- if ((ret = __db_pitem(file_dbp, pagep, argp->indx, argp->nbytes,
+ if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
argp->hdr.size == 0 ? NULL : &argp->hdr,
argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
goto out;
@@ -83,7 +82,7 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info)
} else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_DUP) ||
(cmp_p == 0 && redo && argp->opcode == DB_REM_DUP)) {
/* Need to undo an add, or redo a delete. */
- if ((ret = __db_ditem(file_dbp,
+ if ((ret = __db_ditem(dbc,
pagep, argp->indx, argp->nbytes)) != 0)
goto out;
change = DB_MPOOL_DIRTY;
@@ -96,8 +95,11 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info)
LSN(pagep) = argp->pagelsn;
}
- if ((ret = memp_fput(mpf, pagep, change)) == 0)
- *lsnp = argp->prev_lsn;
+ if ((ret = memp_fput(mpf, pagep, change)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
out: REC_CLOSE;
}
@@ -114,7 +116,8 @@ __db_split_recover(logp, dbtp, lsnp, redo, info)
void *info;
{
__db_split_args *argp;
- DB *file_dbp, *mdbp;
+ DB *file_dbp;
+ DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int change, cmp_n, cmp_p, ret;
@@ -130,9 +133,7 @@ __db_split_recover(logp, dbtp, lsnp, redo, info)
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
- *lsnp = argp->prev_lsn;
- ret = 0;
- goto out;
+ goto done;
} else
if ((ret = memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
@@ -169,8 +170,11 @@ __db_split_recover(logp, dbtp, lsnp, redo, info)
LSN(pagep) = argp->pagelsn;
change = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(mpf, pagep, change)) == 0)
- *lsnp = argp->prev_lsn;
+ if ((ret = memp_fput(mpf, pagep, change)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
out: REC_CLOSE;
}
@@ -187,7 +191,8 @@ __db_big_recover(logp, dbtp, lsnp, redo, info)
void *info;
{
__db_big_args *argp;
- DB *file_dbp, *mdbp;
+ DB *file_dbp;
+ DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t change;
@@ -209,7 +214,7 @@ __db_big_recover(logp, dbtp, lsnp, redo, info)
} else
if ((ret = memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
+ goto out;
}
/*
@@ -299,9 +304,7 @@ npage: if (argp->next_pgno != PGNO_INVALID) {
* so we would not have to undo anything. In
* this case, don't bother creating a page.
*/
- *lsnp = argp->prev_lsn;
- ret = 0;
- goto out;
+ goto done;
} else
if ((ret = memp_fget(mpf, &argp->next_pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
@@ -323,7 +326,8 @@ npage: if (argp->next_pgno != PGNO_INVALID) {
goto out;
}
- *lsnp = argp->prev_lsn;
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
out: REC_CLOSE;
}
@@ -343,7 +347,8 @@ __db_ovref_recover(logp, dbtp, lsnp, redo, info)
void *info;
{
__db_ovref_args *argp;
- DB *file_dbp, *mdbp;
+ DB *file_dbp;
+ DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int modified, ret;
@@ -370,8 +375,11 @@ __db_ovref_recover(logp, dbtp, lsnp, redo, info)
pagep->lsn = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
- *lsnp = argp->prev_lsn;
+ if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
out: REC_CLOSE;
}
@@ -392,17 +400,20 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info)
void *info;
{
__db_relink_args *argp;
- DB *file_dbp, *mdbp;
+ DB *file_dbp;
+ DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- int modified, ret;
+ int cmp_n, cmp_p, modified, ret;
REC_PRINT(__db_relink_print);
REC_INTRO(__db_relink_read);
/*
- * There are three pages we need to check -- the page, and the
- * previous and next pages, if they existed.
+ * There are up to three pages we need to check -- the page, and the
+ * previous and next pages, if they existed. For a page add operation,
+ * the current page is the result of a split and is being recovered
+ * elsewhere, so all we need do is recover the next page.
*/
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (redo) {
@@ -411,6 +422,9 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info)
}
goto next;
}
+ if (argp->opcode == DB_ADD_PAGE)
+ goto next;
+
modified = 0;
if (log_compare(&LSN(pagep), &argp->lsn) == 0 && redo) {
/* Redo the relink. */
@@ -424,10 +438,8 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info)
pagep->lsn = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
- (void)__db_panic(file_dbp);
+ if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
- }
next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) {
if (redo) {
@@ -437,23 +449,27 @@ next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) {
goto prev;
}
modified = 0;
- if (log_compare(&LSN(pagep), &argp->lsn_next) == 0 && redo) {
- /* Redo the relink. */
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->lsn_next);
+ if ((argp->opcode == DB_REM_PAGE && cmp_p == 0 && redo) ||
+ (argp->opcode == DB_ADD_PAGE && cmp_n == 0 && !redo)) {
+ /* Redo the remove or undo the add. */
pagep->prev_pgno = argp->prev;
pagep->lsn = *lsnp;
modified = 1;
- } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
- /* Undo the relink. */
+ } else if ((argp->opcode == DB_REM_PAGE && cmp_n == 0 && !redo) ||
+ (argp->opcode == DB_ADD_PAGE && cmp_p == 0 && redo)) {
+ /* Undo the remove or redo the add. */
pagep->prev_pgno = argp->pgno;
pagep->lsn = argp->lsn_next;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
- (void)__db_panic(file_dbp);
+ if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
- }
+ if (argp->opcode == DB_ADD_PAGE)
+ goto done;
prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) {
if (redo) {
@@ -476,10 +492,8 @@ prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) {
pagep->lsn = argp->lsn_prev;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
- (void) __db_panic(file_dbp);
+ if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
- }
done: *lsnp = argp->prev_lsn;
ret = 0;
@@ -500,7 +514,8 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info)
void *info;
{
__db_addpage_args *argp;
- DB *file_dbp, *mdbp;
+ DB *file_dbp;
+ DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t change;
@@ -541,8 +556,7 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info)
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
- ret = 0;
- goto out;
+ goto done;
} else
if ((ret = memp_fget(mpf,
&argp->nextpgno, DB_MPOOL_CREATE, &pagep)) != 0)
@@ -563,11 +577,13 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info)
LSN(pagep) = argp->nextlsn;
change = DB_MPOOL_DIRTY;
}
- ret = memp_fput(mpf, pagep, change);
+ if ((ret = memp_fput(mpf, pagep, change)) != 0)
+ goto out;
+
+done: *lsnp = argp->prev_lsn;
+ ret = 0;
-out: if (ret == 0)
- *lsnp = argp->prev_lsn;
- REC_CLOSE;
+out: REC_CLOSE;
}
/*
@@ -598,46 +614,3 @@ __db_debug_recover(logp, dbtp, lsnp, redo, info)
REC_NOOP_CLOSE;
}
-
-/*
- * __db_noop_recover --
- * Recovery function for noop.
- *
- * PUBLIC: int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
- */
-int
-__db_noop_recover(logp, dbtp, lsnp, redo, info)
- DB_LOG *logp;
- DBT *dbtp;
- DB_LSN *lsnp;
- int redo;
- void *info;
-{
- __db_noop_args *argp;
- DB *file_dbp, *mdbp;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- u_int32_t change;
- int cmp_n, cmp_p, ret;
-
- REC_PRINT(__db_noop_print);
- REC_INTRO(__db_noop_read);
-
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0)
- goto out;
-
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
- change = 0;
- if (cmp_p == 0 && redo) {
- LSN(pagep) = *lsnp;
- change = DB_MPOOL_DIRTY;
- } else if (cmp_n == 0 && !redo) {
- LSN(pagep) = argp->prevlsn;
- change = DB_MPOOL_DIRTY;
- }
- *lsnp = argp->prev_lsn;
- ret = memp_fput(mpf, pagep, change);
-
-out: REC_CLOSE;
-}
diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c
index 9d9b599ad6..9f0d0ecf8d 100644
--- a/db2/db/db_ret.c
+++ b/db2/db/db_ret.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_ret.c 10.13 (Sleepycat) 5/7/98";
+static const char sccsid[] = "@(#)db_ret.c 10.16 (Sleepycat) 10/4/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -93,6 +93,8 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
u_int32_t *memsize;
void *(*db_malloc) __P((size_t));
{
+ int ret;
+
/* If returning a partial record, reset the length. */
if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
data = (u_int8_t *)data + dbt->doff;
@@ -120,9 +122,6 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
* guarantees consistency, i.e., the application can always free memory
* without concern as to how many bytes of the record were requested.
*
- * XXX
- * Never allocate 0 bytes, it's known to make malloc/realloc unhappy.
- *
* Use the memory specified by the application: DB_DBT_USERMEM.
*
* !!!
@@ -130,11 +129,8 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
* memory pointer is allowed to be NULL.
*/
if (F_ISSET(dbt, DB_DBT_MALLOC)) {
- dbt->data = db_malloc == NULL ?
- (void *)__db_malloc(len) :
- (void *)db_malloc(len + 1);
- if (dbt->data == NULL)
- return (ENOMEM);
+ if ((ret = __os_malloc(len, db_malloc, &dbt->data)) != 0)
+ return (ret);
} else if (F_ISSET(dbt, DB_DBT_USERMEM)) {
if (len != 0 && (dbt->data == NULL || dbt->ulen < len))
return (ENOMEM);
@@ -142,12 +138,9 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
return (EINVAL);
} else {
if (len != 0 && (*memsize == 0 || *memsize < len)) {
- *memp = *memp == NULL ?
- (void *)__db_malloc(len) :
- (void *)__db_realloc(*memp, len);
- if (*memp == NULL) {
+ if ((ret = __os_realloc(memp, len)) != 0) {
*memsize = 0;
- return (ENOMEM);
+ return (ret);
}
*memsize = len;
}
diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c
deleted file mode 100644
index 73e2a51286..0000000000
--- a/db2/db/db_thread.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)db_thread.c 8.15 (Sleepycat) 4/26/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "db_am.h"
-
-static int __db_getlockid __P((DB *, DB *));
-
-/*
- * __db_gethandle --
- * Called by db access method routines when the DB_THREAD flag is set.
- * This routine returns a handle, either an existing handle from the
- * chain of handles, or creating one if necessary.
- *
- * PUBLIC: int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **));
- */
-int
-__db_gethandle(dbp, am_func, dbpp)
- DB *dbp, **dbpp;
- int (*am_func) __P((DB *, DB *));
-{
- DB *ret_dbp;
- int ret, t_ret;
-
- if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutexp, -1)) != 0)
- return (ret);
-
- if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL)
- /* Simply take one off the list. */
- LIST_REMOVE(ret_dbp, links);
- else {
- /* Allocate a new handle. */
- if ((ret_dbp = (DB *)__db_malloc(sizeof(*dbp))) == NULL) {
- ret = ENOMEM;
- goto err;
- }
- memcpy(ret_dbp, dbp, sizeof(*dbp));
- ret_dbp->internal = NULL;
- TAILQ_INIT(&ret_dbp->curs_queue);
-
- /* Set the locker, the lock structure and the lock DBT. */
- if ((ret = __db_getlockid(dbp, ret_dbp)) != 0)
- goto err;
-
- /* Finally, call the access method specific dup function. */
- if ((ret = am_func(dbp, ret_dbp)) != 0)
- goto err;
- }
-
- *dbpp = ret_dbp;
-
- if (0) {
-err: if (ret_dbp != NULL)
- FREE(ret_dbp, sizeof(*ret_dbp));
- }
- if ((t_ret =
- __db_mutex_unlock((db_mutex_t *)dbp->mutexp, -1)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_puthandle --
- * Return a DB handle to the pool for later use.
- *
- * PUBLIC: int __db_puthandle __P((DB *));
- */
-int
-__db_puthandle(dbp)
- DB *dbp;
-{
- DB *master;
- int ret;
-
- master = dbp->master;
- if ((ret = __db_mutex_lock((db_mutex_t *)master->mutexp, -1)) != 0)
- return (ret);
-
- LIST_INSERT_HEAD(&master->handleq, dbp, links);
-
- return (__db_mutex_unlock((db_mutex_t *)master->mutexp, -1));
-}
-
-/*
- * __db_getlockid --
- * Create a new locker ID and copy the file lock information from
- * the old DB into the new one.
- */
-static int
-__db_getlockid(dbp, new_dbp)
- DB *dbp, *new_dbp;
-{
- int ret;
-
- if (F_ISSET(dbp, DB_AM_LOCKING)) {
- if ((ret = lock_id(dbp->dbenv->lk_info, &new_dbp->locker)) != 0)
- return (ret);
- memcpy(new_dbp->lock.fileid, dbp->lock.fileid, DB_FILE_ID_LEN);
- new_dbp->lock_dbt.size = sizeof(new_dbp->lock);
- new_dbp->lock_dbt.data = &new_dbp->lock;
- }
- return (0);
-}