aboutsummaryrefslogtreecommitdiff
path: root/db2/mp
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-01-02 06:01:06 +0000
committerUlrich Drepper <drepper@redhat.com>2000-01-02 06:01:06 +0000
commit8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3 (patch)
tree099a250d7366aef2ab028fdb24f0d692cd784b4a /db2/mp
parent9a6450d578556c11e7c173d2f28362345b8f1258 (diff)
downloadglibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar
glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar.gz
glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar.bz2
glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.zip
Update.
* Makeconfig (all-subdirs): Remove db and db2. * db/*: Removed. * db2/*: Removed.
Diffstat (limited to 'db2/mp')
-rw-r--r--db2/mp/mp_bh.c592
-rw-r--r--db2/mp/mp_fget.c352
-rw-r--r--db2/mp/mp_fopen.c560
-rw-r--r--db2/mp/mp_fput.c153
-rw-r--r--db2/mp/mp_fset.c83
-rw-r--r--db2/mp/mp_open.c221
-rw-r--r--db2/mp/mp_pr.c304
-rw-r--r--db2/mp/mp_region.c330
-rw-r--r--db2/mp/mp_sync.c549
9 files changed, 0 insertions, 3144 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
deleted file mode 100644
index 12c53417d9..0000000000
--- a/db2/mp/mp_bh.c
+++ /dev/null
@@ -1,592 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c 10.45 (Sleepycat) 11/25/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *));
-
-/*
- * __memp_bhwrite --
- * Write the page associated with a given bucket header.
- *
- * PUBLIC: int __memp_bhwrite
- * PUBLIC: __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
- */
-int
-__memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
- BH *bhp;
- int *restartp, *wrotep;
-{
- DB_MPOOLFILE *dbmfp;
- DB_MPREG *mpreg;
- int incremented, ret;
-
- if (restartp != NULL)
- *restartp = 0;
- if (wrotep != NULL)
- *wrotep = 0;
- incremented = 0;
-
- /*
- * Walk the process' DB_MPOOLFILE list and find a file descriptor for
- * the file. We also check that the descriptor is open for writing.
- * If we find a descriptor on the file that's not open for writing, we
- * try and upgrade it to make it writeable. If that fails, we're done.
- */
- LOCKHANDLE(dbmp, dbmp->mutexp);
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
- if (dbmfp->mfp == mfp) {
- if (F_ISSET(dbmfp, MP_READONLY) &&
- __memp_upgrade(dbmp, dbmfp, mfp)) {
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
- return (0);
- }
-
- /*
- * Increment the reference count -- see the comment in
- * memp_fclose().
- */
- ++dbmfp->ref;
- incremented = 1;
- break;
- }
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
- if (dbmfp != NULL)
- goto found;
-
- /*
- * It's not a page from a file we've opened. If the file requires
- * input/output processing, see if this process has ever registered
- * information as to how to write this type of file. If not, there's
- * nothing we can do.
- */
- if (mfp->ftype != 0) {
- LOCKHANDLE(dbmp, dbmp->mutexp);
- for (mpreg = LIST_FIRST(&dbmp->dbregq);
- mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
- if (mpreg->ftype == mfp->ftype)
- break;
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
- if (mpreg == NULL)
- return (0);
- }
-
- /*
- * Try and open the file, attaching to the underlying shared area.
- *
- * XXX
- * Don't try to attach to temporary files. There are two problems in
- * trying to do that. First, if we have different privileges than the
- * process that "owns" the temporary file, we might create the backing
- * disk file such that the owning process couldn't read/write its own
- * buffers, e.g., memp_trickle() running as root creating a file owned
- * as root, mode 600. Second, if the temporary file has already been
- * created, we don't have any way of finding out what its real name is,
- * and, even if we did, it was already unlinked (so that it won't be
- * left if the process dies horribly). This decision causes a problem,
- * however: if the temporary file consumes the entire buffer cache,
- * and the owner doesn't flush the buffers to disk, we could end up
- * with resource starvation, and the memp_trickle() thread couldn't do
- * anything about it. That's a pretty unlikely scenario, though.
- *
- * XXX
- * There's no negative cache, so we may repeatedly try and open files
- * that we have previously tried (and failed) to open.
- *
- * Ignore any error, assume it's a permissions problem.
- */
- if (F_ISSET(mfp, MP_TEMP))
- return (0);
-
- if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off),
- 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
- return (0);
-
-found: ret = __memp_pgwrite(dbmfp, bhp, restartp, wrotep);
-
- if (incremented) {
- LOCKHANDLE(dbmp, dbmp->mutexp);
- --dbmfp->ref;
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
- }
-
- return (ret);
-}
-
-/*
- * __memp_pgread --
- * Read a page from a file.
- *
- * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
- */
-int
-__memp_pgread(dbmfp, bhp, can_create)
- DB_MPOOLFILE *dbmfp;
- BH *bhp;
- int can_create;
-{
- DB_IO db_io;
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
- size_t len, pagesize;
- ssize_t nr;
- int created, ret;
-
- dbmp = dbmfp->dbmp;
- mfp = dbmfp->mfp;
- pagesize = mfp->stat.st_pagesize;
-
- F_SET(bhp, BH_LOCKED | BH_TRASH);
- LOCKBUFFER(dbmp, bhp);
- UNLOCKREGION(dbmp);
-
- /*
- * Temporary files may not yet have been created. We don't create
- * them now, we create them when the pages have to be flushed.
- */
- nr = 0;
- if (dbmfp->fd == -1)
- ret = 0;
- else {
- /*
- * Ignore read errors if we have permission to create the page.
- * Assume that the page doesn't exist, and that we'll create it
- * when we write it out.
- */
- db_io.fd_io = dbmfp->fd;
- db_io.fd_lock = dbmp->reginfo.fd;
- db_io.mutexp =
- F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL;
- db_io.pagesize = db_io.bytes = pagesize;
- db_io.pgno = bhp->pgno;
- db_io.buf = bhp->buf;
-
- ret = __os_io(&db_io, DB_IO_READ, &nr);
- }
-
- created = 0;
- if (nr < (ssize_t)pagesize) {
- if (can_create)
- created = 1;
- else {
- /* If we had a short read, ret may be 0. */
- if (ret == 0)
- ret = EIO;
- __db_err(dbmp->dbenv,
- "%s: page %lu doesn't exist, create flag not set",
- __memp_fn(dbmfp), (u_long)bhp->pgno);
- goto err;
- }
- }
-
- /*
- * Clear any bytes we didn't read that need to be cleared. If we're
- * running in diagnostic mode, smash any bytes on the page that are
- * unknown quantities for the caller.
- */
- if (nr != (ssize_t)pagesize) {
- len = mfp->clear_len == 0 ? pagesize : mfp->clear_len;
- if (nr < (ssize_t)len)
- memset(bhp->buf + nr, 0, len - nr);
-#ifdef DIAGNOSTIC
- if (nr > (ssize_t)len)
- len = nr;
- if (len < pagesize)
- memset(bhp->buf + len, 0xdb, pagesize - len);
-#endif
- }
-
- /* Call any pgin function. */
- ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
-
- /* Unlock the buffer and reacquire the region lock. */
-err: UNLOCKBUFFER(dbmp, bhp);
- LOCKREGION(dbmp);
-
- /*
- * If no errors occurred, the data is now valid, clear the BH_TRASH
- * flag; regardless, clear the lock bit and let other threads proceed.
- */
- F_CLR(bhp, BH_LOCKED);
- if (ret == 0) {
- F_CLR(bhp, BH_TRASH);
-
- /* Update the statistics. */
- if (created) {
- ++dbmp->mp->stat.st_page_create;
- ++mfp->stat.st_page_create;
- } else {
- ++dbmp->mp->stat.st_page_in;
- ++mfp->stat.st_page_in;
- }
- }
-
- return (ret);
-}
-
-/*
- * __memp_pgwrite --
- * Write a page to a file.
- *
- * PUBLIC: int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
- */
-int
-__memp_pgwrite(dbmfp, bhp, restartp, wrotep)
- DB_MPOOLFILE *dbmfp;
- BH *bhp;
- int *restartp, *wrotep;
-{
- DB_ENV *dbenv;
- DB_IO db_io;
- DB_LOG *lg_info;
- DB_LSN lsn;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- ssize_t nw;
- int callpgin, ret, syncfail;
- const char *fail;
-
- dbmp = dbmfp->dbmp;
- dbenv = dbmp->dbenv;
- mp = dbmp->mp;
- mfp = dbmfp->mfp;
-
- if (restartp != NULL)
- *restartp = 0;
- if (wrotep != NULL)
- *wrotep = 0;
- callpgin = 0;
-
- /*
- * Check the dirty bit -- this buffer may have been written since we
- * decided to write it.
- */
- if (!F_ISSET(bhp, BH_DIRTY)) {
- if (wrotep != NULL)
- *wrotep = 1;
- return (0);
- }
-
- LOCKBUFFER(dbmp, bhp);
-
- /*
- * If there were two writers, we may have just been waiting while the
- * other writer completed I/O on this buffer. Check the dirty bit one
- * more time.
- */
- if (!F_ISSET(bhp, BH_DIRTY)) {
- UNLOCKBUFFER(dbmp, bhp);
-
- if (wrotep != NULL)
- *wrotep = 1;
- return (0);
- }
-
- F_SET(bhp, BH_LOCKED);
- UNLOCKREGION(dbmp);
-
- if (restartp != NULL)
- *restartp = 1;
-
- /* Copy the LSN off the page if we're going to need it. */
- lg_info = dbenv->lg_info;
- if (lg_info != NULL || F_ISSET(bhp, BH_WRITE))
- memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
-
- /* Ensure the appropriate log records are on disk. */
- if (lg_info != NULL && (ret = log_flush(lg_info, &lsn)) != 0)
- goto err;
-
- /*
- * Call any pgout function. We set the callpgin flag so that we flag
- * that the contents of the buffer will need to be passed through pgin
- * before they are reused.
- */
- if (mfp->ftype == 0)
- ret = 0;
- else {
- callpgin = 1;
- if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0)
- goto err;
- }
-
- /* Temporary files may not yet have been created. */
- if (dbmfp->fd == -1) {
- LOCKHANDLE(dbmp, dbmfp->mutexp);
- if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv,
- DB_APP_TMP, NULL, NULL, DB_CREATE | DB_EXCL | DB_TEMPORARY,
- &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) {
- UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- __db_err(dbenv,
- "unable to create temporary backing file");
- goto err;
- }
- UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- }
-
- /* Write the page. */
- db_io.fd_io = dbmfp->fd;
- db_io.fd_lock = dbmp->reginfo.fd;
- db_io.mutexp = F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL;
- db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
- db_io.pgno = bhp->pgno;
- db_io.buf = bhp->buf;
- if ((ret = __os_io(&db_io, DB_IO_WRITE, &nw)) != 0) {
- __db_panic(dbenv, ret);
- fail = "write";
- goto syserr;
- }
- if (nw != (ssize_t)mfp->stat.st_pagesize) {
- ret = EIO;
- fail = "write";
- goto syserr;
- }
-
- if (wrotep != NULL)
- *wrotep = 1;
-
- /* Unlock the buffer and reacquire the region lock. */
- UNLOCKBUFFER(dbmp, bhp);
- LOCKREGION(dbmp);
-
- /*
- * Clean up the flags based on a successful write.
- *
- * If we rewrote the page, it will need processing by the pgin
- * routine before reuse.
- */
- if (callpgin)
- F_SET(bhp, BH_CALLPGIN);
- F_CLR(bhp, BH_DIRTY | BH_LOCKED);
-
- /*
- * If we write a buffer for which a checkpoint is waiting, update
- * the count of pending buffers (both in the mpool as a whole and
- * for this file). If the count for this file goes to zero, flush
- * the writes.
- *
- * XXX:
- * Don't lock the region around the sync, fsync(2) has no atomicity
- * issues.
- *
- * XXX:
- * We ignore errors from the sync -- it makes no sense to return an
- * error to the calling process, so set a flag causing the checkpoint
- * to be retried later.
- */
- if (F_ISSET(bhp, BH_WRITE)) {
- if (mfp->lsn_cnt == 1) {
- UNLOCKREGION(dbmp);
- syncfail = __os_fsync(dbmfp->fd) != 0;
- LOCKREGION(dbmp);
- if (syncfail)
- F_SET(mp, MP_LSN_RETRY);
-
- }
-
- F_CLR(bhp, BH_WRITE);
-
- /*
- * If the buffer just written has a larger LSN than the current
- * max LSN written for this checkpoint, update the saved value.
- */
- if (log_compare(&lsn, &mp->lsn) > 0)
- mp->lsn = lsn;
-
- --mp->lsn_cnt;
- --mfp->lsn_cnt;
- }
-
- /* Update the page clean/dirty statistics. */
- ++mp->stat.st_page_clean;
- --mp->stat.st_page_dirty;
-
- /* Update I/O statistics. */
- ++mp->stat.st_page_out;
- ++mfp->stat.st_page_out;
-
- return (0);
-
-syserr: __db_err(dbenv, "%s: %s failed for page %lu",
- __memp_fn(dbmfp), fail, (u_long)bhp->pgno);
-
-err: /* Unlock the buffer and reacquire the region lock. */
- UNLOCKBUFFER(dbmp, bhp);
- LOCKREGION(dbmp);
-
- /*
- * Clean up the flags based on a failure.
- *
- * The page remains dirty but we remove our lock. If we rewrote the
- * page, it will need processing by the pgin routine before reuse.
- */
- if (callpgin)
- F_SET(bhp, BH_CALLPGIN);
- F_CLR(bhp, BH_LOCKED);
-
- return (ret);
-}
-
-/*
- * __memp_pg --
- * Call the pgin/pgout routine.
- *
- * PUBLIC: int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
- */
-int
-__memp_pg(dbmfp, bhp, is_pgin)
- DB_MPOOLFILE *dbmfp;
- BH *bhp;
- int is_pgin;
-{
- DBT dbt, *dbtp;
- DB_MPOOL *dbmp;
- DB_MPREG *mpreg;
- MPOOLFILE *mfp;
- int ftype, ret;
-
- dbmp = dbmfp->dbmp;
- mfp = dbmfp->mfp;
-
- LOCKHANDLE(dbmp, dbmp->mutexp);
-
- ftype = mfp->ftype;
- for (mpreg = LIST_FIRST(&dbmp->dbregq);
- mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) {
- if (ftype != mpreg->ftype)
- continue;
- if (mfp->pgcookie_len == 0)
- dbtp = NULL;
- else {
- dbt.size = mfp->pgcookie_len;
- dbt.data = R_ADDR(dbmp, mfp->pgcookie_off);
- dbtp = &dbt;
- }
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
- if (is_pgin) {
- if (mpreg->pgin != NULL && (ret =
- mpreg->pgin(bhp->pgno, bhp->buf, dbtp)) != 0)
- goto err;
- } else
- if (mpreg->pgout != NULL && (ret =
- mpreg->pgout(bhp->pgno, bhp->buf, dbtp)) != 0)
- goto err;
- break;
- }
-
- if (mpreg == NULL)
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
- return (0);
-
-err: UNLOCKHANDLE(dbmp, dbmp->mutexp);
- __db_err(dbmp->dbenv, "%s: %s failed for page %lu",
- __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
- return (ret);
-}
-
-/*
- * __memp_bhfree --
- * Free a bucket header and its referenced data.
- *
- * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
- */
-void
-__memp_bhfree(dbmp, mfp, bhp, free_mem)
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
- BH *bhp;
- int free_mem;
-{
- size_t off;
-
- /* Delete the buffer header from the hash bucket queue. */
- off = BUCKET(dbmp->mp, R_OFFSET(dbmp, mfp), bhp->pgno);
- SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, hq, __bh);
-
- /* Delete the buffer header from the LRU queue. */
- SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh);
-
- /*
- * If we're not reusing it immediately, free the buffer header
- * and data for real.
- */
- if (free_mem) {
- __db_shalloc_free(dbmp->addr, bhp);
- --dbmp->mp->stat.st_page_clean;
- }
-}
-
-/*
- * __memp_upgrade --
- * Upgrade a file descriptor from readonly to readwrite.
- */
-static int
-__memp_upgrade(dbmp, dbmfp, mfp)
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
- MPOOLFILE *mfp;
-{
- int fd, ret;
- char *rpath;
-
- /*
- * !!!
- * We expect the handle to already be locked.
- */
-
- /* Check to see if we've already upgraded. */
- if (F_ISSET(dbmfp, MP_UPGRADE))
- return (0);
-
- /* Check to see if we've already failed. */
- if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
- return (1);
-
- /*
- * Calculate the real name for this file and try to open it read/write.
- * We know we have a valid pathname for the file because it's the only
- * way we could have gotten a file descriptor of any kind.
- */
- if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
- NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0)
- return (ret);
- if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
- F_SET(dbmfp, MP_UPGRADE_FAIL);
- ret = 1;
- } else {
- /* Swap the descriptors and set the upgrade flag. */
- (void)__os_close(dbmfp->fd);
- dbmfp->fd = fd;
- F_SET(dbmfp, MP_UPGRADE);
- ret = 0;
- }
- __os_freestr(rpath);
- return (ret);
-}
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
deleted file mode 100644
index f159dc2d3e..0000000000
--- a/db2/mp/mp_fget.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c 10.53 (Sleepycat) 11/16/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_fget --
- * Get a page from the file.
- */
-int
-memp_fget(dbmfp, pgnoaddr, flags, addrp)
- DB_MPOOLFILE *dbmfp;
- db_pgno_t *pgnoaddr;
- u_int32_t flags;
- void *addrp;
-{
- BH *bhp;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- size_t bucket, mf_offset;
- u_int32_t st_hsearch;
- int b_incr, first, ret;
-
- dbmp = dbmfp->dbmp;
- mp = dbmp->mp;
- mfp = dbmfp->mfp;
-
- MP_PANIC_CHECK(dbmp);
-
- /*
- * Validate arguments.
- *
- * !!!
- * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly
- * files here, and create non-existent pages in readonly files if the
- * flags are set, later. The reason is that the hash access method
- * wants to get empty pages that don't really exist in readonly files.
- * The only alternative is for hash to write the last "bucket" all the
- * time, which we don't want to do because one of our big goals in life
- * is to keep database files small. It's sleazy as hell, but we catch
- * any attempt to actually write the file in memp_fput().
- */
-#define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
- if (flags != 0) {
- if ((ret =
- __db_fchk(dbmp->dbenv, "memp_fget", flags, OKFLAGS)) != 0)
- return (ret);
-
- switch (flags) {
- case DB_MPOOL_CREATE:
- case DB_MPOOL_LAST:
- case DB_MPOOL_NEW:
- case 0:
- break;
- default:
- return (__db_ferr(dbmp->dbenv, "memp_fget", 1));
- }
- }
-
-#ifdef DIAGNOSTIC
- /*
- * XXX
- * We want to switch threads as often as possible. Yield every time
- * we get a new page to ensure contention.
- */
- if (DB_GLOBAL(db_pageyield))
- __os_yield(1);
-#endif
-
- /* Initialize remaining local variables. */
- mf_offset = R_OFFSET(dbmp, mfp);
- bhp = NULL;
- st_hsearch = 0;
- b_incr = ret = 0;
-
- /* Determine the hash bucket where this page will live. */
- bucket = BUCKET(mp, mf_offset, *pgnoaddr);
-
- LOCKREGION(dbmp);
-
- /*
- * Check for the last or last + 1 page requests.
- *
- * Examine and update the file's last_pgno value. We don't care if
- * the last_pgno value immediately changes due to another thread --
- * at this instant in time, the value is correct. We do increment the
- * current last_pgno value if the thread is asking for a new page,
- * however, to ensure that two threads creating pages don't get the
- * same one.
- */
- if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
- if (LF_ISSET(DB_MPOOL_NEW))
- ++mfp->last_pgno;
- *pgnoaddr = mfp->last_pgno;
- bucket = BUCKET(mp, mf_offset, mfp->last_pgno);
-
- if (LF_ISSET(DB_MPOOL_NEW))
- goto alloc;
- }
-
- /*
- * If mmap'ing the file and the page is not past the end of the file,
- * just return a pointer.
- *
- * The page may be past the end of the file, so check the page number
- * argument against the original length of the file. If we previously
- * returned pages past the original end of the file, last_pgno will
- * have been updated to match the "new" end of the file, and checking
- * against it would return pointers past the end of the mmap'd region.
- *
- * If another process has opened the file for writing since we mmap'd
- * it, we will start playing the game by their rules, i.e. everything
- * goes through the cache. All pages previously returned will be safe,
- * as long as the correct locking protocol was observed.
- *
- * XXX
- * We don't discard the map because we don't know when all of the
- * pages will have been discarded from the process' address space.
- * It would be possible to do so by reference counting the open
- * pages from the mmap, but it's unclear to me that it's worth it.
- */
- if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP)) {
- if (*pgnoaddr > mfp->orig_last_pgno) {
- /*
- * !!!
- * See the comment above about non-existent pages and
- * the hash access method.
- */
- if (!LF_ISSET(DB_MPOOL_CREATE)) {
- __db_err(dbmp->dbenv,
- "%s: page %lu doesn't exist",
- __memp_fn(dbmfp), (u_long)*pgnoaddr);
- ret = EINVAL;
- goto err;
- }
- } else {
- *(void **)addrp =
- R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
- ++mp->stat.st_map;
- ++mfp->stat.st_map;
- goto done;
- }
- }
-
- /* Search the hash chain for the page. */
- for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
- ++st_hsearch;
- if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
- continue;
-
- /* Increment the reference count. */
- if (bhp->ref == UINT16_T_MAX) {
- __db_err(dbmp->dbenv,
- "%s: page %lu: reference count overflow",
- __memp_fn(dbmfp), (u_long)bhp->pgno);
- ret = EINVAL;
- goto err;
- }
-
- /*
- * Increment the reference count. We may discard the region
- * lock as we evaluate and/or read the buffer, so we need to
- * ensure that it doesn't move and that its contents remain
- * unchanged.
- */
- ++bhp->ref;
- b_incr = 1;
-
- /*
- * Any buffer we find might be trouble.
- *
- * BH_LOCKED --
- * I/O is in progress. Because we've incremented the buffer
- * reference count, we know the buffer can't move. Unlock
- * the region lock, wait for the I/O to complete, and reacquire
- * the region.
- */
- for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {
- UNLOCKREGION(dbmp);
-
- /*
- * Explicitly yield the processor if it's not the first
- * pass through this loop -- if we don't, we might end
- * up running to the end of our CPU quantum as we will
- * simply be swapping between the two locks.
- */
- if (!first)
- __os_yield(1);
-
- LOCKBUFFER(dbmp, bhp);
- /* Wait for I/O to finish... */
- UNLOCKBUFFER(dbmp, bhp);
- LOCKREGION(dbmp);
- }
-
- /*
- * BH_TRASH --
- * The contents of the buffer are garbage. Shouldn't happen,
- * and this read is likely to fail, but might as well try.
- */
- if (F_ISSET(bhp, BH_TRASH))
- goto reread;
-
- /*
- * BH_CALLPGIN --
- * The buffer was converted so it could be written, and the
- * contents need to be converted again.
- */
- if (F_ISSET(bhp, BH_CALLPGIN)) {
- if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
- goto err;
- F_CLR(bhp, BH_CALLPGIN);
- }
-
- ++mp->stat.st_cache_hit;
- ++mfp->stat.st_cache_hit;
- *(void **)addrp = bhp->buf;
- goto done;
- }
-
-alloc: /* Allocate new buffer header and data space. */
- if ((ret = __memp_alloc(dbmp, sizeof(BH) -
- sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
- goto err;
-
-#ifdef DIAGNOSTIC
- if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) {
- __db_err(dbmp->dbenv,
- "Internal error: BH data NOT size_t aligned.");
- ret = EINVAL;
- goto err;
- }
-#endif
- /* Initialize the BH fields. */
- memset(bhp, 0, sizeof(BH));
- LOCKINIT(dbmp, &bhp->mutex);
- bhp->ref = 1;
- bhp->pgno = *pgnoaddr;
- bhp->mf_offset = mf_offset;
-
- /*
- * Prepend the bucket header to the head of the appropriate MPOOL
- * bucket hash list. Append the bucket header to the tail of the
- * MPOOL LRU chain.
- */
- SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
- SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
- /*
- * If we created the page, zero it out and continue.
- *
- * !!!
- * Note: DB_MPOOL_NEW specifically doesn't call the pgin function.
- * If DB_MPOOL_CREATE is used, then the application's pgin function
- * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
- * it can detect all of its page creates, and not bother.
- *
- * Otherwise, read the page into memory, optionally creating it if
- * DB_MPOOL_CREATE is set.
- */
- if (LF_ISSET(DB_MPOOL_NEW)) {
- if (mfp->clear_len == 0)
- memset(bhp->buf, 0, mfp->stat.st_pagesize);
- else {
- memset(bhp->buf, 0, mfp->clear_len);
-#ifdef DIAGNOSTIC
- memset(bhp->buf + mfp->clear_len, 0xdb,
- mfp->stat.st_pagesize - mfp->clear_len);
-#endif
- }
-
- ++mp->stat.st_page_create;
- ++mfp->stat.st_page_create;
- } else {
- /*
- * It's possible for the read function to fail, which means
- * that we fail as well. Note, the __memp_pgread() function
- * discards the region lock, so the buffer must be pinned
- * down so that it cannot move and its contents are unchanged.
- */
-reread: if ((ret = __memp_pgread(dbmfp,
- bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) {
- /*
- * !!!
- * Discard the buffer unless another thread is waiting
- * on our I/O to complete. Regardless, the header has
- * the BH_TRASH flag set.
- */
- if (bhp->ref == 1)
- __memp_bhfree(dbmp, mfp, bhp, 1);
- goto err;
- }
-
- ++mp->stat.st_cache_miss;
- ++mfp->stat.st_cache_miss;
- }
-
- /*
- * If we're returning a page after our current notion of the last-page,
- * update our information. Note, there's no way to un-instantiate this
- * page, it's going to exist whether it's returned to us dirty or not.
- */
- if (bhp->pgno > mfp->last_pgno)
- mfp->last_pgno = bhp->pgno;
-
- ++mp->stat.st_page_clean;
- *(void **)addrp = bhp->buf;
-
-done: /* Update the chain search statistics. */
- if (st_hsearch) {
- ++mp->stat.st_hash_searches;
- if (st_hsearch > mp->stat.st_hash_longest)
- mp->stat.st_hash_longest = st_hsearch;
- mp->stat.st_hash_examined += st_hsearch;
- }
-
- ++dbmfp->pinref;
-
- UNLOCKREGION(dbmp);
-
- return (0);
-
-err: /* Discard our reference. */
- if (b_incr)
- --bhp->ref;
- UNLOCKREGION(dbmp);
-
- *(void **)addrp = NULL;
- return (ret);
-}
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
deleted file mode 100644
index dd02662fd8..0000000000
--- a/db2/mp/mp_fopen.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *,
- const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
-
-/*
- * memp_fopen --
- * Open a backing file for the memory pool.
- */
-int
-memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
- DB_MPOOL *dbmp;
- const char *path;
- u_int32_t flags;
- int mode;
- size_t pagesize;
- DB_MPOOL_FINFO *finfop;
- DB_MPOOLFILE **retp;
-{
- int ret;
-
- MP_PANIC_CHECK(dbmp);
-
- /* Validate arguments. */
- if ((ret = __db_fchk(dbmp->dbenv,
- "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
- return (ret);
-
- /* Require a non-zero pagesize. */
- if (pagesize == 0) {
- __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
- return (EINVAL);
- }
- if (finfop != NULL && finfop->clear_len > pagesize)
- return (EINVAL);
-
- return (__memp_fopen(dbmp,
- NULL, path, flags, mode, pagesize, 1, finfop, retp));
-}
-
-/*
- * __memp_fopen --
- * Open a backing file for the memory pool; internal version.
- *
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
- * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
- */
-int
-__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
- const char *path;
- u_int32_t flags;
- int mode, needlock;
- size_t pagesize;
- DB_MPOOL_FINFO *finfop;
- DB_MPOOLFILE **retp;
-{
- DB_ENV *dbenv;
- DB_MPOOLFILE *dbmfp;
- DB_MPOOL_FINFO finfo;
- db_pgno_t last_pgno;
- size_t maxmap;
- u_int32_t mbytes, bytes;
- int ret;
- u_int8_t idbuf[DB_FILE_ID_LEN];
- char *rpath;
-
- dbenv = dbmp->dbenv;
- ret = 0;
- rpath = NULL;
-
- /*
- * If mfp is provided, we take the DB_MPOOL_FINFO information from
- * the mfp. We don't bother initializing everything, because some
- * of them are expensive to acquire. If no mfp is provided and the
- * finfop argument is NULL, we default the values.
- */
- if (finfop == NULL) {
- memset(&finfo, 0, sizeof(finfo));
- if (mfp != NULL) {
- finfo.ftype = mfp->ftype;
- finfo.pgcookie = NULL;
- finfo.fileid = NULL;
- finfo.lsn_offset = mfp->lsn_off;
- finfo.clear_len = mfp->clear_len;
- } else {
- finfo.ftype = 0;
- finfo.pgcookie = NULL;
- finfo.fileid = NULL;
- finfo.lsn_offset = -1;
- finfo.clear_len = 0;
- }
- finfop = &finfo;
- }
-
- /* Allocate and initialize the per-process structure. */
- if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
- return (ret);
- dbmfp->dbmp = dbmp;
- dbmfp->fd = -1;
- dbmfp->ref = 1;
- if (LF_ISSET(DB_RDONLY))
- F_SET(dbmfp, MP_READONLY);
-
- if (path == NULL) {
- if (LF_ISSET(DB_RDONLY)) {
- __db_err(dbenv,
- "memp_fopen: temporary files can't be readonly");
- ret = EINVAL;
- goto err;
- }
- last_pgno = 0;
- } else {
- /* Get the real name for this file and open it. */
- if ((ret = __db_appname(dbenv,
- DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
- goto err;
- if ((ret = __db_open(rpath,
- LF_ISSET(DB_CREATE | DB_RDONLY),
- DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
- __db_err(dbenv, "%s: %s", rpath, strerror(ret));
- goto err;
- }
-
- /*
- * Don't permit files that aren't a multiple of the pagesize,
- * and find the number of the last page in the file, all the
- * time being careful not to overflow 32 bits.
- *
- * !!!
- * We can't use off_t's here, or in any code in the mainline
- * library for that matter. (We have to use them in the os
- * stubs, of course, as there are system calls that take them
- * as arguments.) The reason is that some customers build in
- * environments where an off_t is 32-bits, but still run where
- * offsets are 64-bits, and they pay us a lot of money.
- */
- if ((ret = __os_ioinfo(rpath,
- dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
- __db_err(dbenv, "%s: %s", rpath, strerror(ret));
- goto err;
- }
-
- /* Page sizes have to be a power-of-two, ignore mbytes. */
- if (bytes % pagesize != 0) {
- __db_err(dbenv,
- "%s: file size not a multiple of the pagesize",
- rpath);
- ret = EINVAL;
- goto err;
- }
-
- last_pgno = mbytes * (MEGABYTE / pagesize);
- last_pgno += bytes / pagesize;
-
- /* Correction: page numbers are zero-based, not 1-based. */
- if (last_pgno != 0)
- --last_pgno;
-
- /*
- * Get the file id if we weren't given one. Generated file id's
- * don't use timestamps, otherwise there'd be no chance of any
- * other process joining the party.
- */
- if (finfop->fileid == NULL) {
- if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
- goto err;
- finfop->fileid = idbuf;
- }
- }
-
- /*
- * If we weren't provided an underlying shared object to join with,
- * find/allocate the shared file objects. Also allocate space for
- * for the per-process thread lock.
- */
- if (needlock)
- LOCKREGION(dbmp);
-
- if (mfp == NULL)
- ret = __memp_mf_open(dbmp,
- path, pagesize, last_pgno, finfop, &mfp);
- else {
- ++mfp->ref;
- ret = 0;
- }
- if (ret == 0 &&
- F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
- __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
- LOCKINIT(dbmp, dbmfp->mutexp);
-
- if (needlock)
- UNLOCKREGION(dbmp);
- if (ret != 0)
- goto err;
-
- dbmfp->mfp = mfp;
-
- /*
- * If a file:
- * + is read-only
- * + isn't temporary
- * + doesn't require any pgin/pgout support
- * + the DB_NOMMAP flag wasn't set
- * + and is less than mp_mmapsize bytes in size
- *
- * we can mmap it instead of reading/writing buffers. Don't do error
- * checking based on the mmap call failure. We want to do normal I/O
- * on the file if the reason we failed was because the file was on an
- * NFS mounted partition, and we can fail in buffer I/O just as easily
- * as here.
- *
- * XXX
- * We'd like to test to see if the file is too big to mmap. Since we
- * don't know what size or type off_t's or size_t's are, or the largest
- * unsigned integral type is, or what random insanity the local C
- * compiler will perpetrate, doing the comparison in a portable way is
- * flatly impossible. Hope that mmap fails if the file is too large.
- */
-#define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */
- if (F_ISSET(mfp, MP_CAN_MMAP)) {
- if (!F_ISSET(dbmfp, MP_READONLY))
- F_CLR(mfp, MP_CAN_MMAP);
- if (path == NULL)
- F_CLR(mfp, MP_CAN_MMAP);
- if (finfop->ftype != 0)
- F_CLR(mfp, MP_CAN_MMAP);
- if (LF_ISSET(DB_NOMMAP))
- F_CLR(mfp, MP_CAN_MMAP);
- maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
- DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
- if (mbytes > maxmap / MEGABYTE ||
- (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
- F_CLR(mfp, MP_CAN_MMAP);
- }
- dbmfp->addr = NULL;
- if (F_ISSET(mfp, MP_CAN_MMAP)) {
- dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
- if (__db_mapfile(rpath,
- dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
- dbmfp->addr = NULL;
- F_CLR(mfp, MP_CAN_MMAP);
- }
- }
- if (rpath != NULL)
- __os_freestr(rpath);
-
- LOCKHANDLE(dbmp, dbmp->mutexp);
- TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
- *retp = dbmfp;
- return (0);
-
-err: /*
- * Note that we do not have to free the thread mutex, because we
- * never get to here after we have successfully allocated it.
- */
- if (rpath != NULL)
- __os_freestr(rpath);
- if (dbmfp->fd != -1)
- (void)__os_close(dbmfp->fd);
- if (dbmfp != NULL)
- __os_free(dbmfp, sizeof(DB_MPOOLFILE));
- return (ret);
-}
-
-/*
- * __memp_mf_open --
- * Open an MPOOLFILE.
- */
-static int
-__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
- DB_MPOOL *dbmp;
- const char *path;
- size_t pagesize;
- db_pgno_t last_pgno;
- DB_MPOOL_FINFO *finfop;
- MPOOLFILE **retp;
-{
- MPOOLFILE *mfp;
- int ret;
- void *p;
-
-#define ISTEMPORARY (path == NULL)
-
- /*
- * Walk the list of MPOOLFILE's, looking for a matching file.
- * Temporary files can't match previous files.
- */
- if (!ISTEMPORARY)
- for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- if (F_ISSET(mfp, MP_TEMP))
- continue;
- if (!memcmp(finfop->fileid,
- R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
- if (finfop->clear_len != mfp->clear_len ||
- finfop->ftype != mfp->ftype ||
- pagesize != mfp->stat.st_pagesize) {
- __db_err(dbmp->dbenv,
- "%s: ftype, clear length or pagesize changed",
- path);
- return (EINVAL);
- }
-
- /* Found it: increment the reference count. */
- ++mfp->ref;
- *retp = mfp;
- return (0);
- }
- }
-
- /* Allocate a new MPOOLFILE. */
- if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
- return (ret);
- *retp = mfp;
-
- /* Initialize the structure. */
- memset(mfp, 0, sizeof(MPOOLFILE));
- mfp->ref = 1;
- mfp->ftype = finfop->ftype;
- mfp->lsn_off = finfop->lsn_offset;
- mfp->clear_len = finfop->clear_len;
-
- /*
- * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
- * we have to know the last page in the file. Figure it out and save
- * it away.
- */
- mfp->stat.st_pagesize = pagesize;
- mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
-
- if (ISTEMPORARY)
- F_SET(mfp, MP_TEMP);
- else {
- /* Copy the file path into shared memory. */
- if ((ret = __memp_alloc(dbmp,
- strlen(path) + 1, &mfp->path_off, &p)) != 0)
- goto err;
- memcpy(p, path, strlen(path) + 1);
-
- /* Copy the file identification string into shared memory. */
- if ((ret = __memp_alloc(dbmp,
- DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
- goto err;
- memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
-
- F_SET(mfp, MP_CAN_MMAP);
- }
-
- /* Copy the page cookie into shared memory. */
- if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
- mfp->pgcookie_len = 0;
- mfp->pgcookie_off = 0;
- } else {
- if ((ret = __memp_alloc(dbmp,
- finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
- goto err;
- memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
- mfp->pgcookie_len = finfop->pgcookie->size;
- }
-
- /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
- SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
-
- if (0) {
-err: if (mfp->path_off != 0)
- __db_shalloc_free(dbmp->addr,
- R_ADDR(dbmp, mfp->path_off));
- if (mfp->fileid_off != 0)
- __db_shalloc_free(dbmp->addr,
- R_ADDR(dbmp, mfp->fileid_off));
- if (mfp != NULL)
- __db_shalloc_free(dbmp->addr, mfp);
- mfp = NULL;
- }
- return (0);
-}
-
-/*
- * memp_fclose --
- * Close a backing file for the memory pool.
- */
-int
-memp_fclose(dbmfp)
- DB_MPOOLFILE *dbmfp;
-{
- DB_MPOOL *dbmp;
- int ret, t_ret;
-
- dbmp = dbmfp->dbmp;
- ret = 0;
-
- MP_PANIC_CHECK(dbmp);
-
- for (;;) {
- LOCKHANDLE(dbmp, dbmp->mutexp);
-
- /*
- * We have to reference count DB_MPOOLFILE structures as other
- * threads may be using them. The problem only happens if the
- * application makes a bad design choice. Here's the path:
- *
- * Thread A opens a database.
- * Thread B uses thread A's DB_MPOOLFILE to write a buffer
- * in order to free up memory in the mpool cache.
- * Thread A closes the database while thread B is using the
- * DB_MPOOLFILE structure.
- *
- * By opening all databases before creating the threads, and
- * closing them after the threads have exited, applications
- * get better performance and avoid the problem path entirely.
- *
- * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
- * is a short-term lock, even in worst case, since we better be
- * the only thread of control using the DB_MPOOLFILE structure
- * to read pages *into* the cache. Wait until we're the only
- * reference holder and remove the DB_MPOOLFILE structure from
- * the list, so nobody else can even find it.
- */
- if (dbmfp->ref == 1) {
- TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
- break;
- }
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
- (void)__os_sleep(1, 0);
- }
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
- /* Complain if pinned blocks never returned. */
- if (dbmfp->pinref != 0)
- __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
- __memp_fn(dbmfp), (u_long)dbmfp->pinref);
-
- /* Close the underlying MPOOLFILE. */
- (void)__memp_mf_close(dbmp, dbmfp);
-
- /* Discard any mmap information. */
- if (dbmfp->addr != NULL &&
- (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
- __db_err(dbmp->dbenv,
- "%s: %s", __memp_fn(dbmfp), strerror(ret));
-
- /* Close the file; temporary files may not yet have been created. */
- if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
- __db_err(dbmp->dbenv,
- "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
- if (ret != 0)
- t_ret = ret;
- }
-
- /* Free memory. */
- if (dbmfp->mutexp != NULL) {
- LOCKREGION(dbmp);
- __db_shalloc_free(dbmp->addr, dbmfp->mutexp);
- UNLOCKREGION(dbmp);
- }
-
- /* Discard the DB_MPOOLFILE structure. */
- __os_free(dbmfp, sizeof(DB_MPOOLFILE));
-
- return (ret);
-}
-
-/*
- * __memp_mf_close --
- * Close down an MPOOLFILE.
- */
-static int
-__memp_mf_close(dbmp, dbmfp)
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *dbmfp;
-{
- BH *bhp, *nbhp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- size_t mf_offset;
-
- mp = dbmp->mp;
- mfp = dbmfp->mfp;
-
- LOCKREGION(dbmp);
-
- /* If more than a single reference, simply decrement. */
- if (mfp->ref > 1) {
- --mfp->ref;
- goto ret1;
- }
-
- /*
- * Move any BH's held by the file to the free list. We don't free the
- * memory itself because we may be discarding the memory pool, and it's
- * fairly expensive to reintegrate the buffers back into the region for
- * no purpose.
- */
- mf_offset = R_OFFSET(dbmp, mfp);
- for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
- nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
-#ifdef DEBUG_NO_DIRTY
- /* Complain if we find any blocks that were left dirty. */
- if (F_ISSET(bhp, BH_DIRTY))
- __db_err(dbmp->dbenv,
- "%s: close: pgno %lu left dirty; ref %lu",
- __memp_fn(dbmfp),
- (u_long)bhp->pgno, (u_long)bhp->ref);
-#endif
-
- if (bhp->mf_offset == mf_offset) {
- if (F_ISSET(bhp, BH_DIRTY)) {
- ++mp->stat.st_page_clean;
- --mp->stat.st_page_dirty;
- }
- __memp_bhfree(dbmp, mfp, bhp, 0);
- SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
- }
- }
-
- /* Delete from the list of MPOOLFILEs. */
- SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
-
- /* Free the space. */
- if (mfp->path_off != 0)
- __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
- if (mfp->fileid_off != 0)
- __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
- if (mfp->pgcookie_off != 0)
- __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
- __db_shalloc_free(dbmp->addr, mfp);
-
-ret1: UNLOCKREGION(dbmp);
- return (0);
-}
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
deleted file mode 100644
index c551f97380..0000000000
--- a/db2/mp/mp_fput.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c 10.24 (Sleepycat) 9/27/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_fput --
- * Mpool file put function.
- */
-int
-memp_fput(dbmfp, pgaddr, flags)
- DB_MPOOLFILE *dbmfp;
- void *pgaddr;
- u_int32_t flags;
-{
- BH *bhp;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- int wrote, ret;
-
- dbmp = dbmfp->dbmp;
- mp = dbmp->mp;
-
- MP_PANIC_CHECK(dbmp);
-
- /* Validate arguments. */
- if (flags) {
- if ((ret = __db_fchk(dbmp->dbenv, "memp_fput", flags,
- DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0)
- return (ret);
- if ((ret = __db_fcchk(dbmp->dbenv, "memp_fput",
- flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
- return (ret);
-
- if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
- __db_err(dbmp->dbenv,
- "%s: dirty flag set for readonly file page",
- __memp_fn(dbmfp));
- return (EACCES);
- }
- }
-
- LOCKREGION(dbmp);
-
- /* Decrement the pinned reference count. */
- if (dbmfp->pinref == 0)
- __db_err(dbmp->dbenv,
- "%s: put: more blocks returned than retrieved",
- __memp_fn(dbmfp));
- else
- --dbmfp->pinref;
-
- /*
- * If we're mapping the file, there's nothing to do. Because we can
- * stop mapping the file at any time, we have to check on each buffer
- * to see if the address we gave the application was part of the map
- * region.
- */
- if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
- (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) {
- UNLOCKREGION(dbmp);
- return (0);
- }
-
- /* Convert the page address to a buffer header. */
- bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
-
- /* Set/clear the page bits. */
- if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
- ++mp->stat.st_page_clean;
- --mp->stat.st_page_dirty;
- F_CLR(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
- --mp->stat.st_page_clean;
- ++mp->stat.st_page_dirty;
- F_SET(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DISCARD))
- F_SET(bhp, BH_DISCARD);
-
- /*
- * Check for a reference count going to zero. This can happen if the
- * application returns a page twice.
- */
- if (bhp->ref == 0) {
- __db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned",
- __memp_fn(dbmfp), (u_long)bhp->pgno);
- UNLOCKREGION(dbmp);
- return (EINVAL);
- }
-
- /*
- * If more than one reference to the page, we're done. Ignore the
- * discard flags (for now) and leave it at its position in the LRU
- * chain. The rest gets done at last reference close.
- */
- if (--bhp->ref > 0) {
- UNLOCKREGION(dbmp);
- return (0);
- }
-
- /*
- * If this buffer is scheduled for writing because of a checkpoint, we
- * need to write it (if we marked it dirty), or update the checkpoint
- * counters (if we didn't mark it dirty). If we try to write it and
- * can't, that's not necessarily an error, but set a flag so that the
- * next time the memp_sync function runs we try writing it there, as
- * the checkpoint application better be able to write all of the files.
- */
- if (F_ISSET(bhp, BH_WRITE)) {
- if (F_ISSET(bhp, BH_DIRTY)) {
- if (__memp_bhwrite(dbmp,
- dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote)
- F_SET(mp, MP_LSN_RETRY);
- } else {
- F_CLR(bhp, BH_WRITE);
-
- --dbmfp->mfp->lsn_cnt;
- --mp->lsn_cnt;
- }
- }
-
- /* Move the buffer to the head/tail of the LRU chain. */
- SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
- if (F_ISSET(bhp, BH_DISCARD))
- SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
- else
- SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
-
- UNLOCKREGION(dbmp);
- return (0);
-}
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
deleted file mode 100644
index 1940d3b198..0000000000
--- a/db2/mp/mp_fset.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c 10.16 (Sleepycat) 9/27/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_fset --
- * Mpool page set-flag routine.
- */
-int
-memp_fset(dbmfp, pgaddr, flags)
- DB_MPOOLFILE *dbmfp;
- void *pgaddr;
- u_int32_t flags;
-{
- BH *bhp;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- int ret;
-
- dbmp = dbmfp->dbmp;
- mp = dbmp->mp;
-
- MP_PANIC_CHECK(dbmp);
-
- /* Validate arguments. */
- if (flags == 0)
- return (__db_ferr(dbmp->dbenv, "memp_fset", 1));
-
- if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
- DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
- return (ret);
- if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
- flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
- return (ret);
-
- if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
- __db_err(dbmp->dbenv,
- "%s: dirty flag set for readonly file page",
- __memp_fn(dbmfp));
- return (EACCES);
- }
-
- /* Convert the page address to a buffer header. */
- bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
-
- LOCKREGION(dbmp);
-
- if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
- ++mp->stat.st_page_clean;
- --mp->stat.st_page_dirty;
- F_CLR(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
- --mp->stat.st_page_clean;
- ++mp->stat.st_page_dirty;
- F_SET(bhp, BH_DIRTY);
- }
- if (LF_ISSET(DB_MPOOL_DISCARD))
- F_SET(bhp, BH_DISCARD);
-
- UNLOCKREGION(dbmp);
- return (0);
-}
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
deleted file mode 100644
index 4c90fc438f..0000000000
--- a/db2/mp/mp_open.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_open.c 10.27 (Sleepycat) 10/1/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_open --
- * Initialize and/or join a memory pool.
- */
-int
-memp_open(path, flags, mode, dbenv, retp)
- const char *path;
- u_int32_t flags;
- int mode;
- DB_ENV *dbenv;
- DB_MPOOL **retp;
-{
- DB_MPOOL *dbmp;
- size_t cachesize;
- int is_private, ret;
-
- /* Validate arguments. */
-#ifdef HAVE_SPINLOCKS
-#define OKFLAGS (DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD)
-#else
-#define OKFLAGS (DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP)
-#endif
- if ((ret = __db_fchk(dbenv, "memp_open", flags, OKFLAGS)) != 0)
- return (ret);
-
- /* Extract fields from DB_ENV structure. */
- cachesize = dbenv == NULL ? 0 : dbenv->mp_size;
-
- /* Create and initialize the DB_MPOOL structure. */
- if ((ret = __os_calloc(1, sizeof(DB_MPOOL), &dbmp)) != 0)
- return (ret);
- LIST_INIT(&dbmp->dbregq);
- TAILQ_INIT(&dbmp->dbmfq);
-
- dbmp->dbenv = dbenv;
-
- /* Decide if it's possible for anyone else to access the pool. */
- is_private =
- (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE);
-
- /*
- * Map in the region. We do locking regardless, as portions of it are
- * implemented in common code (if we put the region in a file, that is).
- */
- F_SET(dbmp, MP_LOCKREGION);
- if ((ret = __memp_ropen(dbmp,
- path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0)
- goto err;
- F_CLR(dbmp, MP_LOCKREGION);
-
- /*
- * If there's concurrent access, then we have to lock the region.
- * If it's threaded, then we have to lock both the handles and the
- * region, and we need to allocate a mutex for that purpose.
- */
- if (!is_private)
- F_SET(dbmp, MP_LOCKREGION);
- if (LF_ISSET(DB_THREAD)) {
- F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION);
- LOCKREGION(dbmp);
- ret = __memp_alloc(dbmp,
- sizeof(db_mutex_t), NULL, &dbmp->mutexp);
- UNLOCKREGION(dbmp);
- if (ret != 0) {
- (void)memp_close(dbmp);
- goto err;
- }
- LOCKINIT(dbmp, dbmp->mutexp);
- }
-
- *retp = dbmp;
- return (0);
-
-err: if (dbmp != NULL)
- __os_free(dbmp, sizeof(DB_MPOOL));
- return (ret);
-}
-
-/*
- * memp_close --
- * Close a memory pool.
- */
-int
-memp_close(dbmp)
- DB_MPOOL *dbmp;
-{
- DB_MPOOLFILE *dbmfp;
- DB_MPREG *mpreg;
- int ret, t_ret;
-
- ret = 0;
-
- MP_PANIC_CHECK(dbmp);
-
- /* Discard DB_MPREGs. */
- while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
- LIST_REMOVE(mpreg, q);
- __os_free(mpreg, sizeof(DB_MPREG));
- }
-
- /* Discard DB_MPOOLFILEs. */
- while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
- if ((t_ret = memp_fclose(dbmfp)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Discard thread mutex. */
- if (F_ISSET(dbmp, MP_LOCKHANDLE)) {
- LOCKREGION(dbmp);
- __db_shalloc_free(dbmp->addr, dbmp->mutexp);
- UNLOCKREGION(dbmp);
- }
-
- /* Close the region. */
- if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0)
- ret = t_ret;
-
- if (dbmp->reginfo.path != NULL)
- __os_freestr(dbmp->reginfo.path);
- __os_free(dbmp, sizeof(DB_MPOOL));
-
- return (ret);
-}
-
-/*
- * __memp_panic --
- * Panic a memory pool.
- *
- * PUBLIC: void __memp_panic __P((DB_ENV *));
- */
-void
-__memp_panic(dbenv)
- DB_ENV *dbenv;
-{
- if (dbenv->mp_info != NULL)
- dbenv->mp_info->mp->rlayout.panic = 1;
-}
-
-/*
- * memp_unlink --
- * Exit a memory pool.
- */
-int
-memp_unlink(path, force, dbenv)
- const char *path;
- int force;
- DB_ENV *dbenv;
-{
- REGINFO reginfo;
- int ret;
-
- memset(&reginfo, 0, sizeof(reginfo));
- reginfo.dbenv = dbenv;
- reginfo.appname = DB_APP_NONE;
- if (path != NULL && (ret = __os_strdup(path, &reginfo.path)) != 0)
- return (ret);
- reginfo.file = DB_DEFAULT_MPOOL_FILE;
- ret = __db_runlink(&reginfo, force);
- if (reginfo.path != NULL)
- __os_freestr(reginfo.path);
- return (ret);
-}
-
-/*
- * memp_register --
- * Register a file type's pgin, pgout routines.
- */
-int
-memp_register(dbmp, ftype, pgin, pgout)
- DB_MPOOL *dbmp;
- int ftype;
- int (*pgin) __P((db_pgno_t, void *, DBT *));
- int (*pgout) __P((db_pgno_t, void *, DBT *));
-{
- DB_MPREG *mpr;
- int ret;
-
- MP_PANIC_CHECK(dbmp);
-
- if ((ret = __os_malloc(sizeof(DB_MPREG), NULL, &mpr)) != 0)
- return (ret);
-
- mpr->ftype = ftype;
- mpr->pgin = pgin;
- mpr->pgout = pgout;
-
- /*
- * Insert at the head. Because we do a linear walk, we'll find
- * the most recent registry in the case of multiple entries, so
- * we don't have to check for multiple registries.
- */
- LOCKHANDLE(dbmp, dbmp->mutexp);
- LIST_INSERT_HEAD(&dbmp->dbregq, mpr, q);
- UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
- return (0);
-}
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
deleted file mode 100644
index 84c782e781..0000000000
--- a/db2/mp/mp_pr.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c 10.30 (Sleepycat) 10/1/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "db_auto.h"
-#include "db_ext.h"
-#include "common_ext.h"
-
-static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
-
-/*
- * memp_stat --
- * Display MPOOL statistics.
- */
-int
-memp_stat(dbmp, gspp, fspp, db_malloc)
- DB_MPOOL *dbmp;
- DB_MPOOL_STAT **gspp;
- DB_MPOOL_FSTAT ***fspp;
- void *(*db_malloc) __P((size_t));
-{
- DB_MPOOL_FSTAT **tfsp;
- MPOOLFILE *mfp;
- size_t len, nlen;
- int ret;
- char *name;
-
- MP_PANIC_CHECK(dbmp);
-
- /* Allocate space for the global statistics. */
- if (gspp != NULL) {
- *gspp = NULL;
-
- if ((ret = __os_malloc(sizeof(**gspp), db_malloc, gspp)) != 0)
- return (ret);
-
- LOCKREGION(dbmp);
-
- /* Copy out the global statistics. */
- **gspp = dbmp->mp->stat;
- (*gspp)->st_hash_buckets = dbmp->mp->htab_buckets;
- (*gspp)->st_region_wait =
- dbmp->mp->rlayout.lock.mutex_set_wait;
- (*gspp)->st_region_nowait =
- dbmp->mp->rlayout.lock.mutex_set_nowait;
- (*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt;
- (*gspp)->st_regsize = dbmp->mp->rlayout.size;
-
- UNLOCKREGION(dbmp);
- }
-
- if (fspp != NULL) {
- *fspp = NULL;
-
- LOCKREGION(dbmp);
-
- /* Count the MPOOLFILE structures. */
- for (len = 0,
- mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL;
- ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
- ;
-
- UNLOCKREGION(dbmp);
-
- if (len == 0)
- return (0);
-
- /* Allocate space for the pointers. */
- len = (len + 1) * sizeof(DB_MPOOL_FSTAT *);
- if ((ret = __os_malloc(len, db_malloc, fspp)) != 0)
- return (ret);
-
- LOCKREGION(dbmp);
-
- /* Build each individual entry. */
- for (tfsp = *fspp,
- mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL;
- ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- name = __memp_fns(dbmp, mfp);
- nlen = strlen(name);
- len = sizeof(DB_MPOOL_FSTAT) + nlen + 1;
- if ((ret = __os_malloc(len, db_malloc, tfsp)) != 0)
- return (ret);
- **tfsp = mfp->stat;
- (*tfsp)->file_name = (char *)
- (u_int8_t *)*tfsp + sizeof(DB_MPOOL_FSTAT);
- memcpy((*tfsp)->file_name, name, nlen + 1);
- }
- *tfsp = NULL;
-
- UNLOCKREGION(dbmp);
- }
- return (0);
-}
-
-/*
- * __memp_fn --
- * On errors we print whatever is available as the file name.
- *
- * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *));
- */
-char *
-__memp_fn(dbmfp)
- DB_MPOOLFILE *dbmfp;
-{
- return (__memp_fns(dbmfp->dbmp, dbmfp->mfp));
-}
-
-/*
- * __memp_fns --
- * On errors we print whatever is available as the file name.
- *
- * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
- *
- */
-char *
-__memp_fns(dbmp, mfp)
- DB_MPOOL *dbmp;
- MPOOLFILE *mfp;
-{
- if (mfp->path_off == 0)
- return ((char *)"temporary");
-
- return ((char *)R_ADDR(dbmp, mfp->path_off));
-}
-
-#define FMAP_ENTRIES 200 /* Files we map. */
-
-#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */
-#define MPOOL_DUMP_LRU 0x02 /* Debug LRU chains. */
-#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */
-#define MPOOL_DUMP_ALL 0x07 /* Debug all. */
-
-
-/*
- * __memp_dump_region --
- * Display MPOOL structures.
- *
- * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
- */
-void
-__memp_dump_region(dbmp, area, fp)
- DB_MPOOL *dbmp;
- char *area;
- FILE *fp;
-{
- BH *bhp;
- DB_HASHTAB *htabp;
- DB_MPOOLFILE *dbmfp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- size_t bucket, fmap[FMAP_ENTRIES + 1];
- u_int32_t flags;
- int cnt;
-
- /* Make it easy to call from the debugger. */
- if (fp == NULL)
- fp = stderr;
-
- for (flags = 0; *area != '\0'; ++area)
- switch (*area) {
- case 'A':
- LF_SET(MPOOL_DUMP_ALL);
- break;
- case 'h':
- LF_SET(MPOOL_DUMP_HASH);
- break;
- case 'l':
- LF_SET(MPOOL_DUMP_LRU);
- break;
- case 'm':
- LF_SET(MPOOL_DUMP_MEM);
- break;
- }
-
- LOCKREGION(dbmp);
-
- mp = dbmp->mp;
-
- /* Display MPOOL structures. */
- (void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n",
- DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr);
-
- /* Display the MPOOLFILE structures. */
- cnt = 0;
- for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
- (void)fprintf(fp, "file #%d: %s: refs %lu, type %ld, %s\n",
- cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref,
- (long)mfp->ftype,
- F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write");
- if (cnt < FMAP_ENTRIES)
- fmap[cnt] = R_OFFSET(dbmp, mfp);
- }
-
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
- (void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n",
- cnt + 1, __memp_fn(dbmfp), dbmfp->fd,
- F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
- if (cnt < FMAP_ENTRIES)
- fmap[cnt] = R_OFFSET(dbmp, mfp);
- }
- if (cnt < FMAP_ENTRIES)
- fmap[cnt] = INVALID;
- else
- fmap[FMAP_ENTRIES] = INVALID;
-
- /* Display the hash table list of BH's. */
- if (LF_ISSET(MPOOL_DUMP_HASH)) {
- (void)fprintf(fp,
- "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n",
- DB_LINE, (u_long)mp->htab_buckets);
- for (htabp = dbmp->htab,
- bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
- if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
- (void)fprintf(fp, "%lu:\n", (u_long)bucket);
- for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
- __memp_pbh(dbmp, bhp, fmap, fp);
- }
- }
-
- /* Display the LRU list of BH's. */
- if (LF_ISSET(MPOOL_DUMP_LRU)) {
- (void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE);
- (void)fprintf(fp, "pageno, file, ref, address\n");
- for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
- __memp_pbh(dbmp, bhp, fmap, fp);
- }
-
- if (LF_ISSET(MPOOL_DUMP_MEM))
- __db_shalloc_dump(dbmp->addr, fp);
-
- UNLOCKREGION(dbmp);
-
- /* Flush in case we're debugging. */
- (void)fflush(fp);
-}
-
-/*
- * __memp_pbh --
- * Display a BH structure.
- */
-static void
-__memp_pbh(dbmp, bhp, fmap, fp)
- DB_MPOOL *dbmp;
- BH *bhp;
- size_t *fmap;
- FILE *fp;
-{
- static const FN fn[] = {
- { BH_CALLPGIN, "callpgin" },
- { BH_DIRTY, "dirty" },
- { BH_DISCARD, "discard" },
- { BH_LOCKED, "locked" },
- { BH_TRASH, "trash" },
- { BH_WRITE, "write" },
- { 0 },
- };
- int i;
-
- for (i = 0; i < FMAP_ENTRIES; ++i)
- if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset)
- break;
-
- if (fmap[i] == INVALID)
- (void)fprintf(fp, " %4lu, %lu, %2lu, %lu",
- (u_long)bhp->pgno, (u_long)bhp->mf_offset,
- (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
- else
- (void)fprintf(fp, " %4lu, #%d, %2lu, %lu",
- (u_long)bhp->pgno, i + 1,
- (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
-
- __db_prflags(bhp->flags, fn, fp);
-
- (void)fprintf(fp, "\n");
-}
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
deleted file mode 100644
index b9c92f2e13..0000000000
--- a/db2/mp/mp_region.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * __memp_reg_alloc --
- * Allocate some space in the mpool region, with locking.
- *
- * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
- */
-int
-__memp_reg_alloc(dbmp, len, offsetp, retp)
- DB_MPOOL *dbmp;
- size_t len, *offsetp;
- void *retp;
-{
- int ret;
-
- LOCKREGION(dbmp);
- ret = __memp_alloc(dbmp, len, offsetp, retp);
- UNLOCKREGION(dbmp);
- return (ret);
-}
-
-/*
- * __memp_alloc --
- * Allocate some space in the mpool region.
- *
- * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
- */
-int
-__memp_alloc(dbmp, len, offsetp, retp)
- DB_MPOOL *dbmp;
- size_t len, *offsetp;
- void *retp;
-{
- BH *bhp, *nbhp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- size_t fsize, total;
- int nomore, restart, ret, wrote;
- void *p;
-
- mp = dbmp->mp;
-
- nomore = 0;
-alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
- if (offsetp != NULL)
- *offsetp = R_OFFSET(dbmp, p);
- *(void **)retp = p;
- return (0);
- }
- if (nomore) {
- __db_err(dbmp->dbenv,
- "Unable to allocate %lu bytes from mpool shared region: %s\n",
- (u_long)len, strerror(ret));
- return (ret);
- }
-
- /* Look for a buffer on the free list that's the right size. */
- for (bhp =
- SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
- nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
- if (__db_shsizeof(bhp) == len) {
- SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
- if (offsetp != NULL)
- *offsetp = R_OFFSET(dbmp, bhp);
- *(void **)retp = bhp;
- return (0);
- }
- }
-
- /* Discard from the free list until we've freed enough memory. */
- total = 0;
- for (bhp =
- SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
- nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
- SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
- __db_shalloc_free(dbmp->addr, bhp);
- --mp->stat.st_page_clean;
-
- /*
- * Retry as soon as we've freed up sufficient space. If we
- * will have to coalesce memory to satisfy the request, don't
- * try until it's likely (possible?) that we'll succeed.
- */
- total += fsize = __db_shsizeof(bhp);
- if (fsize >= len || total >= 3 * len)
- goto alloc;
- }
-
-retry: /* Find a buffer we can flush; pure LRU. */
- restart = total = 0;
- for (bhp =
- SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
- nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
- /* Ignore pinned or locked (I/O in progress) buffers. */
- if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
- continue;
-
- /* Find the associated MPOOLFILE. */
- mfp = R_ADDR(dbmp, bhp->mf_offset);
-
- /*
- * Write the page if it's dirty.
- *
- * If we wrote the page, fall through and free the buffer. We
- * don't have to rewalk the list to acquire the buffer because
- * it was never available for any other process to modify it.
- * If we didn't write the page, but we discarded and reacquired
- * the region lock, restart the buffer list walk. If we neither
- * wrote the buffer nor discarded the region lock, continue down
- * the buffer list.
- */
- if (F_ISSET(bhp, BH_DIRTY)) {
- if ((ret = __memp_bhwrite(dbmp,
- mfp, bhp, &restart, &wrote)) != 0)
- return (ret);
-
- /*
- * It's possible that another process wants this buffer
- * and incremented the ref count while we were writing
- * it.
- */
- if (bhp->ref != 0)
- goto retry;
-
- if (wrote)
- ++mp->stat.st_rw_evict;
- else {
- if (restart)
- goto retry;
- continue;
- }
- } else
- ++mp->stat.st_ro_evict;
-
- /*
- * Check to see if the buffer is the size we're looking for.
- * If it is, simply reuse it.
- */
- total += fsize = __db_shsizeof(bhp);
- if (fsize == len) {
- __memp_bhfree(dbmp, mfp, bhp, 0);
-
- if (offsetp != NULL)
- *offsetp = R_OFFSET(dbmp, bhp);
- *(void **)retp = bhp;
- return (0);
- }
-
- /* Free the buffer. */
- __memp_bhfree(dbmp, mfp, bhp, 1);
-
- /*
- * Retry as soon as we've freed up sufficient space. If we
- * have to coalesce of memory to satisfy the request, don't
- * try until it's likely (possible?) that we'll succeed.
- */
- if (fsize >= len || total >= 3 * len)
- goto alloc;
-
- /* Restart the walk if we discarded the region lock. */
- if (restart)
- goto retry;
- }
- nomore = 1;
- goto alloc;
-}
-
-/*
- * __memp_ropen --
- * Attach to, and optionally create, the mpool region.
- *
- * PUBLIC: int __memp_ropen
- * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
- */
-int
-__memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
- DB_MPOOL *dbmp;
- const char *path;
- size_t cachesize;
- int mode, is_private;
- u_int32_t flags;
-{
- MPOOL *mp;
- size_t rlen;
- int defcache, ret;
-
- /*
- * Unlike other DB subsystems, mpool can't simply grow the region
- * because it returns pointers into the region to its clients. To
- * "grow" the region, we'd have to allocate a new region and then
- * store a region number in the structures that reference regional
- * objects. It's reasonable that we fail regardless, as clients
- * shouldn't have every page in the region pinned, so the only
- * "failure" mode should be a performance penalty because we don't
- * find a page in the cache that we'd like to have found.
- *
- * Up the user's cachesize by 25% to account for our overhead.
- */
- defcache = 0;
- if (cachesize < DB_CACHESIZE_MIN) {
- if (cachesize == 0) {
- defcache = 1;
- cachesize = DB_CACHESIZE_DEF;
- } else
- cachesize = DB_CACHESIZE_MIN;
- }
- rlen = cachesize + cachesize / 4;
-
- /*
- * Map in the region.
- *
- * If it's a private mpool, use malloc, it's a lot faster than
- * instantiating a region.
- */
- dbmp->reginfo.dbenv = dbmp->dbenv;
- dbmp->reginfo.appname = DB_APP_NONE;
- if (path == NULL)
- dbmp->reginfo.path = NULL;
- else
- if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
- return (ret);
- dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
- dbmp->reginfo.mode = mode;
- dbmp->reginfo.size = rlen;
- dbmp->reginfo.dbflags = flags;
- dbmp->reginfo.flags = 0;
- if (defcache)
- F_SET(&dbmp->reginfo, REGION_SIZEDEF);
-
- /*
- * If we're creating a temporary region, don't use any standard
- * naming.
- */
- if (is_private) {
- dbmp->reginfo.appname = DB_APP_TMP;
- dbmp->reginfo.file = NULL;
- F_SET(&dbmp->reginfo, REGION_PRIVATE);
- }
-
- if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
- if (dbmp->reginfo.path != NULL)
- __os_freestr(dbmp->reginfo.path);
- return (ret);
- }
-
- /*
- * The MPOOL structure is first in the region, the rest of the region
- * is free space.
- */
- dbmp->mp = dbmp->reginfo.addr;
- dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
-
- /* Initialize a created region. */
- if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
- mp = dbmp->mp;
- SH_TAILQ_INIT(&mp->bhq);
- SH_TAILQ_INIT(&mp->bhfq);
- SH_TAILQ_INIT(&mp->mpfq);
-
- __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
-
- /*
- * Assume we want to keep the hash chains with under 10 pages
- * on each chain. We don't know the pagesize in advance, and
- * it may differ for different files. Use a pagesize of 1K for
- * the calculation -- we walk these chains a lot, they should
- * be short.
- */
- mp->htab_buckets =
- __db_tablesize((cachesize / (1 * 1024)) / 10);
-
- /* Allocate hash table space and initialize it. */
- if ((ret = __db_shalloc(dbmp->addr,
- mp->htab_buckets * sizeof(DB_HASHTAB),
- 0, &dbmp->htab)) != 0)
- goto err;
- __db_hashinit(dbmp->htab, mp->htab_buckets);
- mp->htab = R_OFFSET(dbmp, dbmp->htab);
-
- ZERO_LSN(mp->lsn);
- mp->lsn_cnt = 0;
-
- memset(&mp->stat, 0, sizeof(mp->stat));
- mp->stat.st_cachesize = cachesize;
-
- mp->flags = 0;
- }
-
- /* Get the local hash table address. */
- dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
-
- UNLOCKREGION(dbmp);
- return (0);
-
-err: UNLOCKREGION(dbmp);
- (void)__db_rdetach(&dbmp->reginfo);
- if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
- (void)memp_unlink(path, 1, dbmp->dbenv);
-
- if (dbmp->reginfo.path != NULL)
- __os_freestr(dbmp->reginfo.path);
- return (ret);
-}
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
deleted file mode 100644
index 535348517c..0000000000
--- a/db2/mp/mp_sync.c
+++ /dev/null
@@ -1,549 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- * Sleepycat Software. All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c 10.31 (Sleepycat) 12/11/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <stdlib.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-static int __bhcmp __P((const void *, const void *));
-static int __memp_fsync __P((DB_MPOOLFILE *));
-
-/*
- * memp_sync --
- * Mpool sync function.
- */
-int
-memp_sync(dbmp, lsnp)
- DB_MPOOL *dbmp;
- DB_LSN *lsnp;
-{
- BH *bhp, **bharray;
- DB_ENV *dbenv;
- MPOOL *mp;
- MPOOLFILE *mfp;
- int ar_cnt, nalloc, next, maxpin, ret, wrote;
-
- MP_PANIC_CHECK(dbmp);
-
- dbenv = dbmp->dbenv;
- mp = dbmp->mp;
-
- if (dbenv->lg_info == NULL) {
- __db_err(dbenv, "memp_sync: requires logging");
- return (EINVAL);
- }
-
- /*
- * We try and write the buffers in page order: it should reduce seeks
- * by the underlying filesystem and possibly reduce the actual number
- * of writes. We don't want to hold the region lock while we write
- * the buffers, so only hold it lock while we create a list. Get a
- * good-size block of memory to hold buffer pointers, we don't want
- * to run out.
- */
- LOCKREGION(dbmp);
- nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10;
- UNLOCKREGION(dbmp);
-
- if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0)
- return (ret);
-
- LOCKREGION(dbmp);
-
- /*
- * If the application is asking about a previous call to memp_sync(),
- * and we haven't found any buffers that the application holding the
- * pin couldn't write, return yes or no based on the current count.
- * Note, if the application is asking about a LSN *smaller* than one
- * we've already handled or are currently handling, then we return a
- * result based on the count for the larger LSN.
- */
- if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) {
- if (mp->lsn_cnt == 0) {
- *lsnp = mp->lsn;
- ret = 0;
- } else
- ret = DB_INCOMPLETE;
- goto done;
- }
-
- /* Else, it's a new checkpoint. */
- F_CLR(mp, MP_LSN_RETRY);
-
- /*
- * Save the LSN. We know that it's a new LSN or larger than the one
- * for which we were already doing a checkpoint. (BTW, I don't expect
- * to see multiple LSN's from the same or multiple processes, but You
- * Just Never Know. Responding as if they all called with the largest
- * of the LSNs specified makes everything work.)
- *
- * We don't currently use the LSN we save. We could potentially save
- * the last-written LSN in each buffer header and use it to determine
- * what buffers need to be written. The problem with this is that it's
- * sizeof(LSN) more bytes of buffer header. We currently write all the
- * dirty buffers instead.
- *
- * Walk the list of shared memory segments clearing the count of
- * buffers waiting to be written.
- */
- mp->lsn = *lsnp;
- mp->lsn_cnt = 0;
- for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
- mfp->lsn_cnt = 0;
-
- /*
- * Walk the list of buffers and mark all dirty buffers to be written
- * and all pinned buffers to be potentially written (we can't know if
- * we'll need to write them until the holding process returns them to
- * the cache). We do this in one pass while holding the region locked
- * so that processes can't make new buffers dirty, causing us to never
- * finish. Since the application may have restarted the sync, clear
- * any BH_WRITE flags that appear to be left over from previous calls.
- *
- * We don't want to pin down the entire buffer cache, otherwise we'll
- * starve threads needing new pages. Don't pin down more than 80% of
- * the cache.
- *
- * Keep a count of the total number of buffers we need to write in
- * MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count.
- */
- ar_cnt = 0;
- maxpin = ((mp->stat.st_page_dirty + mp->stat.st_page_clean) * 8) / 10;
- for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
- if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) {
- F_SET(bhp, BH_WRITE);
-
- ++mp->lsn_cnt;
-
- mfp = R_ADDR(dbmp, bhp->mf_offset);
- ++mfp->lsn_cnt;
-
- /*
- * If the buffer isn't in use, we should be able to
- * write it immediately, so increment the reference
- * count to lock it and its contents down, and then
- * save a reference to it.
- *
- * If we've run out space to store buffer references,
- * we're screwed. We don't want to realloc the array
- * while holding a region lock, so we set the flag to
- * force the checkpoint to be done again, from scratch,
- * later.
- *
- * If we've pinned down too much of the cache stop, and
- * set a flag to force the checkpoint to be tried again
- * later.
- */
- if (bhp->ref == 0) {
- ++bhp->ref;
- bharray[ar_cnt] = bhp;
- if (++ar_cnt >= nalloc || ar_cnt >= maxpin) {
- F_SET(mp, MP_LSN_RETRY);
- break;
- }
- }
- } else
- if (F_ISSET(bhp, BH_WRITE))
- F_CLR(bhp, BH_WRITE);
-
- /* If there no buffers we can write immediately, we're done. */
- if (ar_cnt == 0) {
- ret = mp->lsn_cnt ? DB_INCOMPLETE : 0;
- goto done;
- }
-
- UNLOCKREGION(dbmp);
-
- /* Sort the buffers we're going to write. */
- qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp);
-
- LOCKREGION(dbmp);
-
- /* Walk the array, writing buffers. */
- for (next = 0; next < ar_cnt; ++next) {
- /*
- * It's possible for a thread to have gotten the buffer since
- * we listed it for writing. If the reference count is still
- * 1, we're the only ones using the buffer, go ahead and write.
- * If it's >1, then skip the buffer and assume that it will be
- * written when it's returned to the cache.
- */
- if (bharray[next]->ref > 1) {
- --bharray[next]->ref;
- continue;
- }
-
- /* Write the buffer. */
- mfp = R_ADDR(dbmp, bharray[next]->mf_offset);
- ret = __memp_bhwrite(dbmp, mfp, bharray[next], NULL, &wrote);
-
- /* Release the buffer. */
- --bharray[next]->ref;
-
- /* If there's an error, release the rest of the buffers. */
- if (ret != 0 || !wrote) {
- /*
- * Any process syncing the shared memory buffer pool
- * had better be able to write to any underlying file.
- * Be understanding, but firm, on this point.
- */
- if (ret == 0) {
- __db_err(dbenv, "%s: unable to flush page: %lu",
- __memp_fns(dbmp, mfp),
- (u_long)bharray[next]->pgno);
- ret = EPERM;
- }
-
- while (++next < ar_cnt)
- --bharray[next]->ref;
- goto err;
- }
- }
- ret = mp->lsn_cnt != 0 ||
- F_ISSET(mp, MP_LSN_RETRY) ? DB_INCOMPLETE : 0;
-
-done:
- if (0) {
-err: /*
- * On error, clear:
- * MPOOL->lsn_cnt (the total sync count)
- * MPOOLFILE->lsn_cnt (the per-file sync count)
- * BH_WRITE flag (the scheduled for writing flag)
- */
- mp->lsn_cnt = 0;
- for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
- mfp->lsn_cnt = 0;
- for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
- F_CLR(bhp, BH_WRITE);
- }
- UNLOCKREGION(dbmp);
- __os_free(bharray, nalloc * sizeof(BH *));
- return (ret);
-}
-
-/*
- * memp_fsync --
- * Mpool file sync function.
- */
-int
-memp_fsync(dbmfp)
- DB_MPOOLFILE *dbmfp;
-{
- DB_MPOOL *dbmp;
- int is_tmp;
-
- dbmp = dbmfp->dbmp;
-
- MP_PANIC_CHECK(dbmp);
-
- /*
- * If this handle doesn't have a file descriptor that's open for
- * writing, or if the file is a temporary, there's no reason to
- * proceed further.
- */
- if (F_ISSET(dbmfp, MP_READONLY))
- return (0);
-
- LOCKREGION(dbmp);
- is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP);
- UNLOCKREGION(dbmp);
- if (is_tmp)
- return (0);
-
- return (__memp_fsync(dbmfp));
-}
-
-/*
- * __mp_xxx_fd --
- * Return a file descriptor for DB 1.85 compatibility locking.
- *
- * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
- */
-int
-__mp_xxx_fd(dbmfp, fdp)
- DB_MPOOLFILE *dbmfp;
- int *fdp;
-{
- int ret;
-
- /*
- * This is a truly spectacular layering violation, intended ONLY to
- * support compatibility for the DB 1.85 DB->fd call.
- *
- * Sync the database file to disk, creating the file as necessary.
- *
- * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3).
- * The MP_READONLY test isn't interesting because we will either
- * already have a file descriptor (we opened the database file for
- * reading) or we aren't readonly (we created the database which
- * requires write privileges). The MP_TEMP test isn't interesting
- * because we want to write to the backing file regardless so that
- * we get a file descriptor to return.
- */
- ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0;
-
- return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret);
-}
-
-/*
- * __memp_fsync --
- * Mpool file internal sync function.
- */
-static int
-__memp_fsync(dbmfp)
- DB_MPOOLFILE *dbmfp;
-{
- BH *bhp, **bharray;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- size_t mf_offset;
- int ar_cnt, incomplete, nalloc, next, ret, wrote;
-
- ret = 0;
- dbmp = dbmfp->dbmp;
- mp = dbmp->mp;
- mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
-
- /*
- * We try and write the buffers in page order: it should reduce seeks
- * by the underlying filesystem and possibly reduce the actual number
- * of writes. We don't want to hold the region lock while we write
- * the buffers, so only hold it lock while we create a list. Get a
- * good-size block of memory to hold buffer pointers, we don't want
- * to run out.
- */
- LOCKREGION(dbmp);
- nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10;
- UNLOCKREGION(dbmp);
-
- if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0)
- return (ret);
-
- LOCKREGION(dbmp);
-
- /*
- * Walk the LRU list of buffer headers, and get a list of buffers to
- * write for this MPOOLFILE.
- */
- ar_cnt = incomplete = 0;
- for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
- if (!F_ISSET(bhp, BH_DIRTY) || bhp->mf_offset != mf_offset)
- continue;
- if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) {
- incomplete = 1;
- continue;
- }
-
- ++bhp->ref;
- bharray[ar_cnt] = bhp;
-
- /*
- * If we've run out space to store buffer references, we're
- * screwed, as we don't want to realloc the array holding a
- * region lock. Set the incomplete flag -- the only way we
- * can get here is if the file is active in the buffer cache,
- * which is the same thing as finding pinned buffers.
- */
- if (++ar_cnt >= nalloc) {
- incomplete = 1;
- break;
- }
- }
-
- UNLOCKREGION(dbmp);
-
- /* Sort the buffers we're going to write. */
- if (ar_cnt != 0)
- qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp);
-
- LOCKREGION(dbmp);
-
- /* Walk the array, writing buffers. */
- for (next = 0; next < ar_cnt; ++next) {
- /*
- * It's possible for a thread to have gotten the buffer since
- * we listed it for writing. If the reference count is still
- * 1, we're the only ones using the buffer, go ahead and write.
- * If it's >1, then skip the buffer.
- */
- if (bharray[next]->ref > 1) {
- incomplete = 1;
-
- --bharray[next]->ref;
- continue;
- }
-
- /* Write the buffer. */
- ret = __memp_pgwrite(dbmfp, bharray[next], NULL, &wrote);
-
- /* Release the buffer. */
- --bharray[next]->ref;
-
- /* If there's an error, release the rest of the buffers. */
- if (ret != 0) {
- while (++next < ar_cnt)
- --bharray[next]->ref;
- goto err;
- }
-
- /*
- * If we didn't write the buffer for some reason, don't return
- * success.
- */
- if (!wrote)
- incomplete = 1;
- }
-
-err: UNLOCKREGION(dbmp);
-
- __os_free(bharray, nalloc * sizeof(BH *));
-
- /*
- * Sync the underlying file as the last thing we do, so that the OS
- * has maximal opportunity to flush buffers before we request it.
- *
- * XXX:
- * Don't lock the region around the sync, fsync(2) has no atomicity
- * issues.
- */
- if (ret == 0)
- return (incomplete ? DB_INCOMPLETE : __os_fsync(dbmfp->fd));
- return (ret);
-}
-
-/*
- * memp_trickle --
- * Keep a specified percentage of the buffers clean.
- */
-int
-memp_trickle(dbmp, pct, nwrotep)
- DB_MPOOL *dbmp;
- int pct, *nwrotep;
-{
- BH *bhp;
- MPOOL *mp;
- MPOOLFILE *mfp;
- u_long total;
- int ret, wrote;
-
- MP_PANIC_CHECK(dbmp);
-
- mp = dbmp->mp;
- if (nwrotep != NULL)
- *nwrotep = 0;
-
- if (pct < 1 || pct > 100)
- return (EINVAL);
-
- LOCKREGION(dbmp);
-
- /*
- * If there are sufficient clean buffers, or no buffers or no dirty
- * buffers, we're done.
- *
- * XXX
- * Using st_page_clean and st_page_dirty is our only choice at the
- * moment, but it's not as correct as we might like in the presence
- * of pools with more than one buffer size, as a free 512-byte buffer
- * isn't the same as a free 8K buffer.
- */
-loop: total = mp->stat.st_page_clean + mp->stat.st_page_dirty;
- if (total == 0 || mp->stat.st_page_dirty == 0 ||
- (mp->stat.st_page_clean * 100) / total >= (u_long)pct) {
- UNLOCKREGION(dbmp);
- return (0);
- }
-
- /* Loop until we write a buffer. */
- for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
- if (bhp->ref != 0 ||
- !F_ISSET(bhp, BH_DIRTY) || F_ISSET(bhp, BH_LOCKED))
- continue;
-
- mfp = R_ADDR(dbmp, bhp->mf_offset);
-
- /*
- * We can't write to temporary files -- see the comment in
- * mp_bh.c:__memp_bhwrite().
- */
- if (F_ISSET(mfp, MP_TEMP))
- continue;
-
- if ((ret = __memp_bhwrite(dbmp, mfp, bhp, NULL, &wrote)) != 0)
- goto err;
-
- /*
- * Any process syncing the shared memory buffer pool had better
- * be able to write to any underlying file. Be understanding,
- * but firm, on this point.
- */
- if (!wrote) {
- __db_err(dbmp->dbenv, "%s: unable to flush page: %lu",
- __memp_fns(dbmp, mfp), (u_long)bhp->pgno);
- ret = EPERM;
- goto err;
- }
-
- ++mp->stat.st_page_trickle;
- if (nwrotep != NULL)
- ++*nwrotep;
- goto loop;
- }
-
- /* No more buffers to write. */
- ret = 0;
-
-err: UNLOCKREGION(dbmp);
- return (ret);
-}
-
-static int
-__bhcmp(p1, p2)
- const void *p1, *p2;
-{
- BH *bhp1, *bhp2;
-
- bhp1 = *(BH * const *)p1;
- bhp2 = *(BH * const *)p2;
-
- /* Sort by file (shared memory pool offset). */
- if (bhp1->mf_offset < bhp2->mf_offset)
- return (-1);
- if (bhp1->mf_offset > bhp2->mf_offset)
- return (1);
-
- /*
- * !!!
- * Defend against badly written quicksort code calling the comparison
- * function with two identical pointers (e.g., WATCOM C++ (Power++)).
- */
- if (bhp1->pgno < bhp2->pgno)
- return (-1);
- if (bhp1->pgno > bhp2->pgno)
- return (1);
- return (0);
-}