diff options
author | Ulrich Drepper <drepper@redhat.com> | 2000-01-02 06:01:06 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2000-01-02 06:01:06 +0000 |
commit | 8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3 (patch) | |
tree | 099a250d7366aef2ab028fdb24f0d692cd784b4a /db2/mp | |
parent | 9a6450d578556c11e7c173d2f28362345b8f1258 (diff) | |
download | glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar.gz glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.tar.bz2 glibc-8d6f1731fcd082e4f744ba9cb4bde4be7c08f1b3.zip |
Update.
* Makeconfig (all-subdirs): Remove db and db2.
* db/*: Removed.
* db2/*: Removed.
Diffstat (limited to 'db2/mp')
-rw-r--r-- | db2/mp/mp_bh.c | 592 | ||||
-rw-r--r-- | db2/mp/mp_fget.c | 352 | ||||
-rw-r--r-- | db2/mp/mp_fopen.c | 560 | ||||
-rw-r--r-- | db2/mp/mp_fput.c | 153 | ||||
-rw-r--r-- | db2/mp/mp_fset.c | 83 | ||||
-rw-r--r-- | db2/mp/mp_open.c | 221 | ||||
-rw-r--r-- | db2/mp/mp_pr.c | 304 | ||||
-rw-r--r-- | db2/mp/mp_region.c | 330 | ||||
-rw-r--r-- | db2/mp/mp_sync.c | 549 |
9 files changed, 0 insertions, 3144 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c deleted file mode 100644 index 12c53417d9..0000000000 --- a/db2/mp/mp_bh.c +++ /dev/null @@ -1,592 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_bh.c 10.45 (Sleepycat) 11/25/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <string.h> -#include <unistd.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *)); - -/* - * __memp_bhwrite -- - * Write the page associated with a given bucket header. - * - * PUBLIC: int __memp_bhwrite - * PUBLIC: __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *)); - */ -int -__memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) - DB_MPOOL *dbmp; - MPOOLFILE *mfp; - BH *bhp; - int *restartp, *wrotep; -{ - DB_MPOOLFILE *dbmfp; - DB_MPREG *mpreg; - int incremented, ret; - - if (restartp != NULL) - *restartp = 0; - if (wrotep != NULL) - *wrotep = 0; - incremented = 0; - - /* - * Walk the process' DB_MPOOLFILE list and find a file descriptor for - * the file. We also check that the descriptor is open for writing. - * If we find a descriptor on the file that's not open for writing, we - * try and upgrade it to make it writeable. If that fails, we're done. - */ - LOCKHANDLE(dbmp, dbmp->mutexp); - for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); - dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) - if (dbmfp->mfp == mfp) { - if (F_ISSET(dbmfp, MP_READONLY) && - __memp_upgrade(dbmp, dbmfp, mfp)) { - UNLOCKHANDLE(dbmp, dbmp->mutexp); - return (0); - } - - /* - * Increment the reference count -- see the comment in - * memp_fclose(). - */ - ++dbmfp->ref; - incremented = 1; - break; - } - UNLOCKHANDLE(dbmp, dbmp->mutexp); - if (dbmfp != NULL) - goto found; - - /* - * It's not a page from a file we've opened. If the file requires - * input/output processing, see if this process has ever registered - * information as to how to write this type of file. If not, there's - * nothing we can do. - */ - if (mfp->ftype != 0) { - LOCKHANDLE(dbmp, dbmp->mutexp); - for (mpreg = LIST_FIRST(&dbmp->dbregq); - mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) - if (mpreg->ftype == mfp->ftype) - break; - UNLOCKHANDLE(dbmp, dbmp->mutexp); - if (mpreg == NULL) - return (0); - } - - /* - * Try and open the file, attaching to the underlying shared area. - * - * XXX - * Don't try to attach to temporary files. There are two problems in - * trying to do that. First, if we have different privileges than the - * process that "owns" the temporary file, we might create the backing - * disk file such that the owning process couldn't read/write its own - * buffers, e.g., memp_trickle() running as root creating a file owned - * as root, mode 600. Second, if the temporary file has already been - * created, we don't have any way of finding out what its real name is, - * and, even if we did, it was already unlinked (so that it won't be - * left if the process dies horribly). This decision causes a problem, - * however: if the temporary file consumes the entire buffer cache, - * and the owner doesn't flush the buffers to disk, we could end up - * with resource starvation, and the memp_trickle() thread couldn't do - * anything about it. That's a pretty unlikely scenario, though. - * - * XXX - * There's no negative cache, so we may repeatedly try and open files - * that we have previously tried (and failed) to open. - * - * Ignore any error, assume it's a permissions problem. - */ - if (F_ISSET(mfp, MP_TEMP)) - return (0); - - if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), - 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0) - return (0); - -found: ret = __memp_pgwrite(dbmfp, bhp, restartp, wrotep); - - if (incremented) { - LOCKHANDLE(dbmp, dbmp->mutexp); - --dbmfp->ref; - UNLOCKHANDLE(dbmp, dbmp->mutexp); - } - - return (ret); -} - -/* - * __memp_pgread -- - * Read a page from a file. - * - * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); - */ -int -__memp_pgread(dbmfp, bhp, can_create) - DB_MPOOLFILE *dbmfp; - BH *bhp; - int can_create; -{ - DB_IO db_io; - DB_MPOOL *dbmp; - MPOOLFILE *mfp; - size_t len, pagesize; - ssize_t nr; - int created, ret; - - dbmp = dbmfp->dbmp; - mfp = dbmfp->mfp; - pagesize = mfp->stat.st_pagesize; - - F_SET(bhp, BH_LOCKED | BH_TRASH); - LOCKBUFFER(dbmp, bhp); - UNLOCKREGION(dbmp); - - /* - * Temporary files may not yet have been created. We don't create - * them now, we create them when the pages have to be flushed. - */ - nr = 0; - if (dbmfp->fd == -1) - ret = 0; - else { - /* - * Ignore read errors if we have permission to create the page. - * Assume that the page doesn't exist, and that we'll create it - * when we write it out. - */ - db_io.fd_io = dbmfp->fd; - db_io.fd_lock = dbmp->reginfo.fd; - db_io.mutexp = - F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL; - db_io.pagesize = db_io.bytes = pagesize; - db_io.pgno = bhp->pgno; - db_io.buf = bhp->buf; - - ret = __os_io(&db_io, DB_IO_READ, &nr); - } - - created = 0; - if (nr < (ssize_t)pagesize) { - if (can_create) - created = 1; - else { - /* If we had a short read, ret may be 0. */ - if (ret == 0) - ret = EIO; - __db_err(dbmp->dbenv, - "%s: page %lu doesn't exist, create flag not set", - __memp_fn(dbmfp), (u_long)bhp->pgno); - goto err; - } - } - - /* - * Clear any bytes we didn't read that need to be cleared. If we're - * running in diagnostic mode, smash any bytes on the page that are - * unknown quantities for the caller. - */ - if (nr != (ssize_t)pagesize) { - len = mfp->clear_len == 0 ? pagesize : mfp->clear_len; - if (nr < (ssize_t)len) - memset(bhp->buf + nr, 0, len - nr); -#ifdef DIAGNOSTIC - if (nr > (ssize_t)len) - len = nr; - if (len < pagesize) - memset(bhp->buf + len, 0xdb, pagesize - len); -#endif - } - - /* Call any pgin function. */ - ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1); - - /* Unlock the buffer and reacquire the region lock. */ -err: UNLOCKBUFFER(dbmp, bhp); - LOCKREGION(dbmp); - - /* - * If no errors occurred, the data is now valid, clear the BH_TRASH - * flag; regardless, clear the lock bit and let other threads proceed. - */ - F_CLR(bhp, BH_LOCKED); - if (ret == 0) { - F_CLR(bhp, BH_TRASH); - - /* Update the statistics. */ - if (created) { - ++dbmp->mp->stat.st_page_create; - ++mfp->stat.st_page_create; - } else { - ++dbmp->mp->stat.st_page_in; - ++mfp->stat.st_page_in; - } - } - - return (ret); -} - -/* - * __memp_pgwrite -- - * Write a page to a file. - * - * PUBLIC: int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *)); - */ -int -__memp_pgwrite(dbmfp, bhp, restartp, wrotep) - DB_MPOOLFILE *dbmfp; - BH *bhp; - int *restartp, *wrotep; -{ - DB_ENV *dbenv; - DB_IO db_io; - DB_LOG *lg_info; - DB_LSN lsn; - DB_MPOOL *dbmp; - MPOOL *mp; - MPOOLFILE *mfp; - ssize_t nw; - int callpgin, ret, syncfail; - const char *fail; - - dbmp = dbmfp->dbmp; - dbenv = dbmp->dbenv; - mp = dbmp->mp; - mfp = dbmfp->mfp; - - if (restartp != NULL) - *restartp = 0; - if (wrotep != NULL) - *wrotep = 0; - callpgin = 0; - - /* - * Check the dirty bit -- this buffer may have been written since we - * decided to write it. - */ - if (!F_ISSET(bhp, BH_DIRTY)) { - if (wrotep != NULL) - *wrotep = 1; - return (0); - } - - LOCKBUFFER(dbmp, bhp); - - /* - * If there were two writers, we may have just been waiting while the - * other writer completed I/O on this buffer. Check the dirty bit one - * more time. - */ - if (!F_ISSET(bhp, BH_DIRTY)) { - UNLOCKBUFFER(dbmp, bhp); - - if (wrotep != NULL) - *wrotep = 1; - return (0); - } - - F_SET(bhp, BH_LOCKED); - UNLOCKREGION(dbmp); - - if (restartp != NULL) - *restartp = 1; - - /* Copy the LSN off the page if we're going to need it. */ - lg_info = dbenv->lg_info; - if (lg_info != NULL || F_ISSET(bhp, BH_WRITE)) - memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN)); - - /* Ensure the appropriate log records are on disk. */ - if (lg_info != NULL && (ret = log_flush(lg_info, &lsn)) != 0) - goto err; - - /* - * Call any pgout function. We set the callpgin flag so that we flag - * that the contents of the buffer will need to be passed through pgin - * before they are reused. - */ - if (mfp->ftype == 0) - ret = 0; - else { - callpgin = 1; - if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0) - goto err; - } - - /* Temporary files may not yet have been created. */ - if (dbmfp->fd == -1) { - LOCKHANDLE(dbmp, dbmfp->mutexp); - if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv, - DB_APP_TMP, NULL, NULL, DB_CREATE | DB_EXCL | DB_TEMPORARY, - &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) { - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - __db_err(dbenv, - "unable to create temporary backing file"); - goto err; - } - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - } - - /* Write the page. */ - db_io.fd_io = dbmfp->fd; - db_io.fd_lock = dbmp->reginfo.fd; - db_io.mutexp = F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL; - db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize; - db_io.pgno = bhp->pgno; - db_io.buf = bhp->buf; - if ((ret = __os_io(&db_io, DB_IO_WRITE, &nw)) != 0) { - __db_panic(dbenv, ret); - fail = "write"; - goto syserr; - } - if (nw != (ssize_t)mfp->stat.st_pagesize) { - ret = EIO; - fail = "write"; - goto syserr; - } - - if (wrotep != NULL) - *wrotep = 1; - - /* Unlock the buffer and reacquire the region lock. */ - UNLOCKBUFFER(dbmp, bhp); - LOCKREGION(dbmp); - - /* - * Clean up the flags based on a successful write. - * - * If we rewrote the page, it will need processing by the pgin - * routine before reuse. - */ - if (callpgin) - F_SET(bhp, BH_CALLPGIN); - F_CLR(bhp, BH_DIRTY | BH_LOCKED); - - /* - * If we write a buffer for which a checkpoint is waiting, update - * the count of pending buffers (both in the mpool as a whole and - * for this file). If the count for this file goes to zero, flush - * the writes. - * - * XXX: - * Don't lock the region around the sync, fsync(2) has no atomicity - * issues. - * - * XXX: - * We ignore errors from the sync -- it makes no sense to return an - * error to the calling process, so set a flag causing the checkpoint - * to be retried later. - */ - if (F_ISSET(bhp, BH_WRITE)) { - if (mfp->lsn_cnt == 1) { - UNLOCKREGION(dbmp); - syncfail = __os_fsync(dbmfp->fd) != 0; - LOCKREGION(dbmp); - if (syncfail) - F_SET(mp, MP_LSN_RETRY); - - } - - F_CLR(bhp, BH_WRITE); - - /* - * If the buffer just written has a larger LSN than the current - * max LSN written for this checkpoint, update the saved value. - */ - if (log_compare(&lsn, &mp->lsn) > 0) - mp->lsn = lsn; - - --mp->lsn_cnt; - --mfp->lsn_cnt; - } - - /* Update the page clean/dirty statistics. */ - ++mp->stat.st_page_clean; - --mp->stat.st_page_dirty; - - /* Update I/O statistics. */ - ++mp->stat.st_page_out; - ++mfp->stat.st_page_out; - - return (0); - -syserr: __db_err(dbenv, "%s: %s failed for page %lu", - __memp_fn(dbmfp), fail, (u_long)bhp->pgno); - -err: /* Unlock the buffer and reacquire the region lock. */ - UNLOCKBUFFER(dbmp, bhp); - LOCKREGION(dbmp); - - /* - * Clean up the flags based on a failure. - * - * The page remains dirty but we remove our lock. If we rewrote the - * page, it will need processing by the pgin routine before reuse. - */ - if (callpgin) - F_SET(bhp, BH_CALLPGIN); - F_CLR(bhp, BH_LOCKED); - - return (ret); -} - -/* - * __memp_pg -- - * Call the pgin/pgout routine. - * - * PUBLIC: int __memp_pg __P((DB_MPOOLFILE *, BH *, int)); - */ -int -__memp_pg(dbmfp, bhp, is_pgin) - DB_MPOOLFILE *dbmfp; - BH *bhp; - int is_pgin; -{ - DBT dbt, *dbtp; - DB_MPOOL *dbmp; - DB_MPREG *mpreg; - MPOOLFILE *mfp; - int ftype, ret; - - dbmp = dbmfp->dbmp; - mfp = dbmfp->mfp; - - LOCKHANDLE(dbmp, dbmp->mutexp); - - ftype = mfp->ftype; - for (mpreg = LIST_FIRST(&dbmp->dbregq); - mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) { - if (ftype != mpreg->ftype) - continue; - if (mfp->pgcookie_len == 0) - dbtp = NULL; - else { - dbt.size = mfp->pgcookie_len; - dbt.data = R_ADDR(dbmp, mfp->pgcookie_off); - dbtp = &dbt; - } - UNLOCKHANDLE(dbmp, dbmp->mutexp); - - if (is_pgin) { - if (mpreg->pgin != NULL && (ret = - mpreg->pgin(bhp->pgno, bhp->buf, dbtp)) != 0) - goto err; - } else - if (mpreg->pgout != NULL && (ret = - mpreg->pgout(bhp->pgno, bhp->buf, dbtp)) != 0) - goto err; - break; - } - - if (mpreg == NULL) - UNLOCKHANDLE(dbmp, dbmp->mutexp); - - return (0); - -err: UNLOCKHANDLE(dbmp, dbmp->mutexp); - __db_err(dbmp->dbenv, "%s: %s failed for page %lu", - __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno); - return (ret); -} - -/* - * __memp_bhfree -- - * Free a bucket header and its referenced data. - * - * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int)); - */ -void -__memp_bhfree(dbmp, mfp, bhp, free_mem) - DB_MPOOL *dbmp; - MPOOLFILE *mfp; - BH *bhp; - int free_mem; -{ - size_t off; - - /* Delete the buffer header from the hash bucket queue. */ - off = BUCKET(dbmp->mp, R_OFFSET(dbmp, mfp), bhp->pgno); - SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, hq, __bh); - - /* Delete the buffer header from the LRU queue. */ - SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh); - - /* - * If we're not reusing it immediately, free the buffer header - * and data for real. - */ - if (free_mem) { - __db_shalloc_free(dbmp->addr, bhp); - --dbmp->mp->stat.st_page_clean; - } -} - -/* - * __memp_upgrade -- - * Upgrade a file descriptor from readonly to readwrite. - */ -static int -__memp_upgrade(dbmp, dbmfp, mfp) - DB_MPOOL *dbmp; - DB_MPOOLFILE *dbmfp; - MPOOLFILE *mfp; -{ - int fd, ret; - char *rpath; - - /* - * !!! - * We expect the handle to already be locked. - */ - - /* Check to see if we've already upgraded. */ - if (F_ISSET(dbmfp, MP_UPGRADE)) - return (0); - - /* Check to see if we've already failed. */ - if (F_ISSET(dbmfp, MP_UPGRADE_FAIL)) - return (1); - - /* - * Calculate the real name for this file and try to open it read/write. - * We know we have a valid pathname for the file because it's the only - * way we could have gotten a file descriptor of any kind. - */ - if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA, - NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0) - return (ret); - if (__db_open(rpath, 0, 0, 0, &fd) != 0) { - F_SET(dbmfp, MP_UPGRADE_FAIL); - ret = 1; - } else { - /* Swap the descriptors and set the upgrade flag. */ - (void)__os_close(dbmfp->fd); - dbmfp->fd = fd; - F_SET(dbmfp, MP_UPGRADE); - ret = 0; - } - __os_freestr(rpath); - return (ret); -} diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c deleted file mode 100644 index f159dc2d3e..0000000000 --- a/db2/mp/mp_fget.c +++ /dev/null @@ -1,352 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_fget.c 10.53 (Sleepycat) 11/16/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -/* - * memp_fget -- - * Get a page from the file. - */ -int -memp_fget(dbmfp, pgnoaddr, flags, addrp) - DB_MPOOLFILE *dbmfp; - db_pgno_t *pgnoaddr; - u_int32_t flags; - void *addrp; -{ - BH *bhp; - DB_MPOOL *dbmp; - MPOOL *mp; - MPOOLFILE *mfp; - size_t bucket, mf_offset; - u_int32_t st_hsearch; - int b_incr, first, ret; - - dbmp = dbmfp->dbmp; - mp = dbmp->mp; - mfp = dbmfp->mfp; - - MP_PANIC_CHECK(dbmp); - - /* - * Validate arguments. - * - * !!! - * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly - * files here, and create non-existent pages in readonly files if the - * flags are set, later. The reason is that the hash access method - * wants to get empty pages that don't really exist in readonly files. - * The only alternative is for hash to write the last "bucket" all the - * time, which we don't want to do because one of our big goals in life - * is to keep database files small. It's sleazy as hell, but we catch - * any attempt to actually write the file in memp_fput(). - */ -#define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW) - if (flags != 0) { - if ((ret = - __db_fchk(dbmp->dbenv, "memp_fget", flags, OKFLAGS)) != 0) - return (ret); - - switch (flags) { - case DB_MPOOL_CREATE: - case DB_MPOOL_LAST: - case DB_MPOOL_NEW: - case 0: - break; - default: - return (__db_ferr(dbmp->dbenv, "memp_fget", 1)); - } - } - -#ifdef DIAGNOSTIC - /* - * XXX - * We want to switch threads as often as possible. Yield every time - * we get a new page to ensure contention. - */ - if (DB_GLOBAL(db_pageyield)) - __os_yield(1); -#endif - - /* Initialize remaining local variables. */ - mf_offset = R_OFFSET(dbmp, mfp); - bhp = NULL; - st_hsearch = 0; - b_incr = ret = 0; - - /* Determine the hash bucket where this page will live. */ - bucket = BUCKET(mp, mf_offset, *pgnoaddr); - - LOCKREGION(dbmp); - - /* - * Check for the last or last + 1 page requests. - * - * Examine and update the file's last_pgno value. We don't care if - * the last_pgno value immediately changes due to another thread -- - * at this instant in time, the value is correct. We do increment the - * current last_pgno value if the thread is asking for a new page, - * however, to ensure that two threads creating pages don't get the - * same one. - */ - if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { - if (LF_ISSET(DB_MPOOL_NEW)) - ++mfp->last_pgno; - *pgnoaddr = mfp->last_pgno; - bucket = BUCKET(mp, mf_offset, mfp->last_pgno); - - if (LF_ISSET(DB_MPOOL_NEW)) - goto alloc; - } - - /* - * If mmap'ing the file and the page is not past the end of the file, - * just return a pointer. - * - * The page may be past the end of the file, so check the page number - * argument against the original length of the file. If we previously - * returned pages past the original end of the file, last_pgno will - * have been updated to match the "new" end of the file, and checking - * against it would return pointers past the end of the mmap'd region. - * - * If another process has opened the file for writing since we mmap'd - * it, we will start playing the game by their rules, i.e. everything - * goes through the cache. All pages previously returned will be safe, - * as long as the correct locking protocol was observed. - * - * XXX - * We don't discard the map because we don't know when all of the - * pages will have been discarded from the process' address space. - * It would be possible to do so by reference counting the open - * pages from the mmap, but it's unclear to me that it's worth it. - */ - if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP)) { - if (*pgnoaddr > mfp->orig_last_pgno) { - /* - * !!! - * See the comment above about non-existent pages and - * the hash access method. - */ - if (!LF_ISSET(DB_MPOOL_CREATE)) { - __db_err(dbmp->dbenv, - "%s: page %lu doesn't exist", - __memp_fn(dbmfp), (u_long)*pgnoaddr); - ret = EINVAL; - goto err; - } - } else { - *(void **)addrp = - R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); - ++mp->stat.st_map; - ++mfp->stat.st_map; - goto done; - } - } - - /* Search the hash chain for the page. */ - for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { - ++st_hsearch; - if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset) - continue; - - /* Increment the reference count. */ - if (bhp->ref == UINT16_T_MAX) { - __db_err(dbmp->dbenv, - "%s: page %lu: reference count overflow", - __memp_fn(dbmfp), (u_long)bhp->pgno); - ret = EINVAL; - goto err; - } - - /* - * Increment the reference count. We may discard the region - * lock as we evaluate and/or read the buffer, so we need to - * ensure that it doesn't move and that its contents remain - * unchanged. - */ - ++bhp->ref; - b_incr = 1; - - /* - * Any buffer we find might be trouble. - * - * BH_LOCKED -- - * I/O is in progress. Because we've incremented the buffer - * reference count, we know the buffer can't move. Unlock - * the region lock, wait for the I/O to complete, and reacquire - * the region. - */ - for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) { - UNLOCKREGION(dbmp); - - /* - * Explicitly yield the processor if it's not the first - * pass through this loop -- if we don't, we might end - * up running to the end of our CPU quantum as we will - * simply be swapping between the two locks. - */ - if (!first) - __os_yield(1); - - LOCKBUFFER(dbmp, bhp); - /* Wait for I/O to finish... */ - UNLOCKBUFFER(dbmp, bhp); - LOCKREGION(dbmp); - } - - /* - * BH_TRASH -- - * The contents of the buffer are garbage. Shouldn't happen, - * and this read is likely to fail, but might as well try. - */ - if (F_ISSET(bhp, BH_TRASH)) - goto reread; - - /* - * BH_CALLPGIN -- - * The buffer was converted so it could be written, and the - * contents need to be converted again. - */ - if (F_ISSET(bhp, BH_CALLPGIN)) { - if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0) - goto err; - F_CLR(bhp, BH_CALLPGIN); - } - - ++mp->stat.st_cache_hit; - ++mfp->stat.st_cache_hit; - *(void **)addrp = bhp->buf; - goto done; - } - -alloc: /* Allocate new buffer header and data space. */ - if ((ret = __memp_alloc(dbmp, sizeof(BH) - - sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0) - goto err; - -#ifdef DIAGNOSTIC - if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) { - __db_err(dbmp->dbenv, - "Internal error: BH data NOT size_t aligned."); - ret = EINVAL; - goto err; - } -#endif - /* Initialize the BH fields. */ - memset(bhp, 0, sizeof(BH)); - LOCKINIT(dbmp, &bhp->mutex); - bhp->ref = 1; - bhp->pgno = *pgnoaddr; - bhp->mf_offset = mf_offset; - - /* - * Prepend the bucket header to the head of the appropriate MPOOL - * bucket hash list. Append the bucket header to the tail of the - * MPOOL LRU chain. - */ - SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh); - SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); - - /* - * If we created the page, zero it out and continue. - * - * !!! - * Note: DB_MPOOL_NEW specifically doesn't call the pgin function. - * If DB_MPOOL_CREATE is used, then the application's pgin function - * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW, - * it can detect all of its page creates, and not bother. - * - * Otherwise, read the page into memory, optionally creating it if - * DB_MPOOL_CREATE is set. - */ - if (LF_ISSET(DB_MPOOL_NEW)) { - if (mfp->clear_len == 0) - memset(bhp->buf, 0, mfp->stat.st_pagesize); - else { - memset(bhp->buf, 0, mfp->clear_len); -#ifdef DIAGNOSTIC - memset(bhp->buf + mfp->clear_len, 0xdb, - mfp->stat.st_pagesize - mfp->clear_len); -#endif - } - - ++mp->stat.st_page_create; - ++mfp->stat.st_page_create; - } else { - /* - * It's possible for the read function to fail, which means - * that we fail as well. Note, the __memp_pgread() function - * discards the region lock, so the buffer must be pinned - * down so that it cannot move and its contents are unchanged. - */ -reread: if ((ret = __memp_pgread(dbmfp, - bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) { - /* - * !!! - * Discard the buffer unless another thread is waiting - * on our I/O to complete. Regardless, the header has - * the BH_TRASH flag set. - */ - if (bhp->ref == 1) - __memp_bhfree(dbmp, mfp, bhp, 1); - goto err; - } - - ++mp->stat.st_cache_miss; - ++mfp->stat.st_cache_miss; - } - - /* - * If we're returning a page after our current notion of the last-page, - * update our information. Note, there's no way to un-instantiate this - * page, it's going to exist whether it's returned to us dirty or not. - */ - if (bhp->pgno > mfp->last_pgno) - mfp->last_pgno = bhp->pgno; - - ++mp->stat.st_page_clean; - *(void **)addrp = bhp->buf; - -done: /* Update the chain search statistics. */ - if (st_hsearch) { - ++mp->stat.st_hash_searches; - if (st_hsearch > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = st_hsearch; - mp->stat.st_hash_examined += st_hsearch; - } - - ++dbmfp->pinref; - - UNLOCKREGION(dbmp); - - return (0); - -err: /* Discard our reference. */ - if (b_incr) - --bhp->ref; - UNLOCKREGION(dbmp); - - *(void **)addrp = NULL; - return (ret); -} diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c deleted file mode 100644 index dd02662fd8..0000000000 --- a/db2/mp/mp_fopen.c +++ /dev/null @@ -1,560 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); -static int __memp_mf_open __P((DB_MPOOL *, - const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **)); - -/* - * memp_fopen -- - * Open a backing file for the memory pool. - */ -int -memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp) - DB_MPOOL *dbmp; - const char *path; - u_int32_t flags; - int mode; - size_t pagesize; - DB_MPOOL_FINFO *finfop; - DB_MPOOLFILE **retp; -{ - int ret; - - MP_PANIC_CHECK(dbmp); - - /* Validate arguments. */ - if ((ret = __db_fchk(dbmp->dbenv, - "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0) - return (ret); - - /* Require a non-zero pagesize. */ - if (pagesize == 0) { - __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified"); - return (EINVAL); - } - if (finfop != NULL && finfop->clear_len > pagesize) - return (EINVAL); - - return (__memp_fopen(dbmp, - NULL, path, flags, mode, pagesize, 1, finfop, retp)); -} - -/* - * __memp_fopen -- - * Open a backing file for the memory pool; internal version. - * - * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, - * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); - */ -int -__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) - DB_MPOOL *dbmp; - MPOOLFILE *mfp; - const char *path; - u_int32_t flags; - int mode, needlock; - size_t pagesize; - DB_MPOOL_FINFO *finfop; - DB_MPOOLFILE **retp; -{ - DB_ENV *dbenv; - DB_MPOOLFILE *dbmfp; - DB_MPOOL_FINFO finfo; - db_pgno_t last_pgno; - size_t maxmap; - u_int32_t mbytes, bytes; - int ret; - u_int8_t idbuf[DB_FILE_ID_LEN]; - char *rpath; - - dbenv = dbmp->dbenv; - ret = 0; - rpath = NULL; - - /* - * If mfp is provided, we take the DB_MPOOL_FINFO information from - * the mfp. We don't bother initializing everything, because some - * of them are expensive to acquire. If no mfp is provided and the - * finfop argument is NULL, we default the values. - */ - if (finfop == NULL) { - memset(&finfo, 0, sizeof(finfo)); - if (mfp != NULL) { - finfo.ftype = mfp->ftype; - finfo.pgcookie = NULL; - finfo.fileid = NULL; - finfo.lsn_offset = mfp->lsn_off; - finfo.clear_len = mfp->clear_len; - } else { - finfo.ftype = 0; - finfo.pgcookie = NULL; - finfo.fileid = NULL; - finfo.lsn_offset = -1; - finfo.clear_len = 0; - } - finfop = &finfo; - } - - /* Allocate and initialize the per-process structure. */ - if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0) - return (ret); - dbmfp->dbmp = dbmp; - dbmfp->fd = -1; - dbmfp->ref = 1; - if (LF_ISSET(DB_RDONLY)) - F_SET(dbmfp, MP_READONLY); - - if (path == NULL) { - if (LF_ISSET(DB_RDONLY)) { - __db_err(dbenv, - "memp_fopen: temporary files can't be readonly"); - ret = EINVAL; - goto err; - } - last_pgno = 0; - } else { - /* Get the real name for this file and open it. */ - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0) - goto err; - if ((ret = __db_open(rpath, - LF_ISSET(DB_CREATE | DB_RDONLY), - DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { - __db_err(dbenv, "%s: %s", rpath, strerror(ret)); - goto err; - } - - /* - * Don't permit files that aren't a multiple of the pagesize, - * and find the number of the last page in the file, all the - * time being careful not to overflow 32 bits. - * - * !!! - * We can't use off_t's here, or in any code in the mainline - * library for that matter. (We have to use them in the os - * stubs, of course, as there are system calls that take them - * as arguments.) The reason is that some customers build in - * environments where an off_t is 32-bits, but still run where - * offsets are 64-bits, and they pay us a lot of money. - */ - if ((ret = __os_ioinfo(rpath, - dbmfp->fd, &mbytes, &bytes, NULL)) != 0) { - __db_err(dbenv, "%s: %s", rpath, strerror(ret)); - goto err; - } - - /* Page sizes have to be a power-of-two, ignore mbytes. */ - if (bytes % pagesize != 0) { - __db_err(dbenv, - "%s: file size not a multiple of the pagesize", - rpath); - ret = EINVAL; - goto err; - } - - last_pgno = mbytes * (MEGABYTE / pagesize); - last_pgno += bytes / pagesize; - - /* Correction: page numbers are zero-based, not 1-based. */ - if (last_pgno != 0) - --last_pgno; - - /* - * Get the file id if we weren't given one. Generated file id's - * don't use timestamps, otherwise there'd be no chance of any - * other process joining the party. - */ - if (finfop->fileid == NULL) { - if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0) - goto err; - finfop->fileid = idbuf; - } - } - - /* - * If we weren't provided an underlying shared object to join with, - * find/allocate the shared file objects. Also allocate space for - * for the per-process thread lock. - */ - if (needlock) - LOCKREGION(dbmp); - - if (mfp == NULL) - ret = __memp_mf_open(dbmp, - path, pagesize, last_pgno, finfop, &mfp); - else { - ++mfp->ref; - ret = 0; - } - if (ret == 0 && - F_ISSET(dbmp, MP_LOCKHANDLE) && (ret = - __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0) - LOCKINIT(dbmp, dbmfp->mutexp); - - if (needlock) - UNLOCKREGION(dbmp); - if (ret != 0) - goto err; - - dbmfp->mfp = mfp; - - /* - * If a file: - * + is read-only - * + isn't temporary - * + doesn't require any pgin/pgout support - * + the DB_NOMMAP flag wasn't set - * + and is less than mp_mmapsize bytes in size - * - * we can mmap it instead of reading/writing buffers. Don't do error - * checking based on the mmap call failure. We want to do normal I/O - * on the file if the reason we failed was because the file was on an - * NFS mounted partition, and we can fail in buffer I/O just as easily - * as here. - * - * XXX - * We'd like to test to see if the file is too big to mmap. Since we - * don't know what size or type off_t's or size_t's are, or the largest - * unsigned integral type is, or what random insanity the local C - * compiler will perpetrate, doing the comparison in a portable way is - * flatly impossible. Hope that mmap fails if the file is too large. - */ -#define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */ - if (F_ISSET(mfp, MP_CAN_MMAP)) { - if (!F_ISSET(dbmfp, MP_READONLY)) - F_CLR(mfp, MP_CAN_MMAP); - if (path == NULL) - F_CLR(mfp, MP_CAN_MMAP); - if (finfop->ftype != 0) - F_CLR(mfp, MP_CAN_MMAP); - if (LF_ISSET(DB_NOMMAP)) - F_CLR(mfp, MP_CAN_MMAP); - maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ? - DB_MAXMMAPSIZE : dbenv->mp_mmapsize; - if (mbytes > maxmap / MEGABYTE || - (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE)) - F_CLR(mfp, MP_CAN_MMAP); - } - dbmfp->addr = NULL; - if (F_ISSET(mfp, MP_CAN_MMAP)) { - dbmfp->len = (size_t)mbytes * MEGABYTE + bytes; - if (__db_mapfile(rpath, - dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) { - dbmfp->addr = NULL; - F_CLR(mfp, MP_CAN_MMAP); - } - } - if (rpath != NULL) - __os_freestr(rpath); - - LOCKHANDLE(dbmp, dbmp->mutexp); - TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); - UNLOCKHANDLE(dbmp, dbmp->mutexp); - - *retp = dbmfp; - return (0); - -err: /* - * Note that we do not have to free the thread mutex, because we - * never get to here after we have successfully allocated it. - */ - if (rpath != NULL) - __os_freestr(rpath); - if (dbmfp->fd != -1) - (void)__os_close(dbmfp->fd); - if (dbmfp != NULL) - __os_free(dbmfp, sizeof(DB_MPOOLFILE)); - return (ret); -} - -/* - * __memp_mf_open -- - * Open an MPOOLFILE. - */ -static int -__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) - DB_MPOOL *dbmp; - const char *path; - size_t pagesize; - db_pgno_t last_pgno; - DB_MPOOL_FINFO *finfop; - MPOOLFILE **retp; -{ - MPOOLFILE *mfp; - int ret; - void *p; - -#define ISTEMPORARY (path == NULL) - - /* - * Walk the list of MPOOLFILE's, looking for a matching file. - * Temporary files can't match previous files. - */ - if (!ISTEMPORARY) - for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - if (F_ISSET(mfp, MP_TEMP)) - continue; - if (!memcmp(finfop->fileid, - R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { - if (finfop->clear_len != mfp->clear_len || - finfop->ftype != mfp->ftype || - pagesize != mfp->stat.st_pagesize) { - __db_err(dbmp->dbenv, - "%s: ftype, clear length or pagesize changed", - path); - return (EINVAL); - } - - /* Found it: increment the reference count. */ - ++mfp->ref; - *retp = mfp; - return (0); - } - } - - /* Allocate a new MPOOLFILE. */ - if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) - return (ret); - *retp = mfp; - - /* Initialize the structure. */ - memset(mfp, 0, sizeof(MPOOLFILE)); - mfp->ref = 1; - mfp->ftype = finfop->ftype; - mfp->lsn_off = finfop->lsn_offset; - mfp->clear_len = finfop->clear_len; - - /* - * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget, - * we have to know the last page in the file. Figure it out and save - * it away. - */ - mfp->stat.st_pagesize = pagesize; - mfp->orig_last_pgno = mfp->last_pgno = last_pgno; - - if (ISTEMPORARY) - F_SET(mfp, MP_TEMP); - else { - /* Copy the file path into shared memory. */ - if ((ret = __memp_alloc(dbmp, - strlen(path) + 1, &mfp->path_off, &p)) != 0) - goto err; - memcpy(p, path, strlen(path) + 1); - - /* Copy the file identification string into shared memory. */ - if ((ret = __memp_alloc(dbmp, - DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) - goto err; - memcpy(p, finfop->fileid, DB_FILE_ID_LEN); - - F_SET(mfp, MP_CAN_MMAP); - } - - /* Copy the page cookie into shared memory. */ - if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) { - mfp->pgcookie_len = 0; - mfp->pgcookie_off = 0; - } else { - if ((ret = __memp_alloc(dbmp, - finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) - goto err; - memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size); - mfp->pgcookie_len = finfop->pgcookie->size; - } - - /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */ - SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile); - - if (0) { -err: if (mfp->path_off != 0) - __db_shalloc_free(dbmp->addr, - R_ADDR(dbmp, mfp->path_off)); - if (mfp->fileid_off != 0) - __db_shalloc_free(dbmp->addr, - R_ADDR(dbmp, mfp->fileid_off)); - if (mfp != NULL) - __db_shalloc_free(dbmp->addr, mfp); - mfp = NULL; - } - return (0); -} - -/* - * memp_fclose -- - * Close a backing file for the memory pool. - */ -int -memp_fclose(dbmfp) - DB_MPOOLFILE *dbmfp; -{ - DB_MPOOL *dbmp; - int ret, t_ret; - - dbmp = dbmfp->dbmp; - ret = 0; - - MP_PANIC_CHECK(dbmp); - - for (;;) { - LOCKHANDLE(dbmp, dbmp->mutexp); - - /* - * We have to reference count DB_MPOOLFILE structures as other - * threads may be using them. The problem only happens if the - * application makes a bad design choice. Here's the path: - * - * Thread A opens a database. - * Thread B uses thread A's DB_MPOOLFILE to write a buffer - * in order to free up memory in the mpool cache. - * Thread A closes the database while thread B is using the - * DB_MPOOLFILE structure. - * - * By opening all databases before creating the threads, and - * closing them after the threads have exited, applications - * get better performance and avoid the problem path entirely. - * - * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer - * is a short-term lock, even in worst case, since we better be - * the only thread of control using the DB_MPOOLFILE structure - * to read pages *into* the cache. Wait until we're the only - * reference holder and remove the DB_MPOOLFILE structure from - * the list, so nobody else can even find it. - */ - if (dbmfp->ref == 1) { - TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); - break; - } - UNLOCKHANDLE(dbmp, dbmp->mutexp); - - (void)__os_sleep(1, 0); - } - UNLOCKHANDLE(dbmp, dbmp->mutexp); - - /* Complain if pinned blocks never returned. */ - if (dbmfp->pinref != 0) - __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned", - __memp_fn(dbmfp), (u_long)dbmfp->pinref); - - /* Close the underlying MPOOLFILE. */ - (void)__memp_mf_close(dbmp, dbmfp); - - /* Discard any mmap information. */ - if (dbmfp->addr != NULL && - (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0) - __db_err(dbmp->dbenv, - "%s: %s", __memp_fn(dbmfp), strerror(ret)); - - /* Close the file; temporary files may not yet have been created. */ - if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) { - __db_err(dbmp->dbenv, - "%s: %s", __memp_fn(dbmfp), strerror(t_ret)); - if (ret != 0) - t_ret = ret; - } - - /* Free memory. */ - if (dbmfp->mutexp != NULL) { - LOCKREGION(dbmp); - __db_shalloc_free(dbmp->addr, dbmfp->mutexp); - UNLOCKREGION(dbmp); - } - - /* Discard the DB_MPOOLFILE structure. */ - __os_free(dbmfp, sizeof(DB_MPOOLFILE)); - - return (ret); -} - -/* - * __memp_mf_close -- - * Close down an MPOOLFILE. - */ -static int -__memp_mf_close(dbmp, dbmfp) - DB_MPOOL *dbmp; - DB_MPOOLFILE *dbmfp; -{ - BH *bhp, *nbhp; - MPOOL *mp; - MPOOLFILE *mfp; - size_t mf_offset; - - mp = dbmp->mp; - mfp = dbmfp->mfp; - - LOCKREGION(dbmp); - - /* If more than a single reference, simply decrement. */ - if (mfp->ref > 1) { - --mfp->ref; - goto ret1; - } - - /* - * Move any BH's held by the file to the free list. We don't free the - * memory itself because we may be discarding the memory pool, and it's - * fairly expensive to reintegrate the buffers back into the region for - * no purpose. - */ - mf_offset = R_OFFSET(dbmp, mfp); - for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { - nbhp = SH_TAILQ_NEXT(bhp, q, __bh); - -#ifdef DEBUG_NO_DIRTY - /* Complain if we find any blocks that were left dirty. */ - if (F_ISSET(bhp, BH_DIRTY)) - __db_err(dbmp->dbenv, - "%s: close: pgno %lu left dirty; ref %lu", - __memp_fn(dbmfp), - (u_long)bhp->pgno, (u_long)bhp->ref); -#endif - - if (bhp->mf_offset == mf_offset) { - if (F_ISSET(bhp, BH_DIRTY)) { - ++mp->stat.st_page_clean; - --mp->stat.st_page_dirty; - } - __memp_bhfree(dbmp, mfp, bhp, 0); - SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh); - } - } - - /* Delete from the list of MPOOLFILEs. */ - SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile); - - /* Free the space. */ - if (mfp->path_off != 0) - __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); - if (mfp->fileid_off != 0) - __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); - if (mfp->pgcookie_off != 0) - __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off)); - __db_shalloc_free(dbmp->addr, mfp); - -ret1: UNLOCKREGION(dbmp); - return (0); -} diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c deleted file mode 100644 index c551f97380..0000000000 --- a/db2/mp/mp_fput.c +++ /dev/null @@ -1,153 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_fput.c 10.24 (Sleepycat) 9/27/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -/* - * memp_fput -- - * Mpool file put function. - */ -int -memp_fput(dbmfp, pgaddr, flags) - DB_MPOOLFILE *dbmfp; - void *pgaddr; - u_int32_t flags; -{ - BH *bhp; - DB_MPOOL *dbmp; - MPOOL *mp; - int wrote, ret; - - dbmp = dbmfp->dbmp; - mp = dbmp->mp; - - MP_PANIC_CHECK(dbmp); - - /* Validate arguments. */ - if (flags) { - if ((ret = __db_fchk(dbmp->dbenv, "memp_fput", flags, - DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0) - return (ret); - if ((ret = __db_fcchk(dbmp->dbenv, "memp_fput", - flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0) - return (ret); - - if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) { - __db_err(dbmp->dbenv, - "%s: dirty flag set for readonly file page", - __memp_fn(dbmfp)); - return (EACCES); - } - } - - LOCKREGION(dbmp); - - /* Decrement the pinned reference count. */ - if (dbmfp->pinref == 0) - __db_err(dbmp->dbenv, - "%s: put: more blocks returned than retrieved", - __memp_fn(dbmfp)); - else - --dbmfp->pinref; - - /* - * If we're mapping the file, there's nothing to do. Because we can - * stop mapping the file at any time, we have to check on each buffer - * to see if the address we gave the application was part of the map - * region. - */ - if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr && - (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) { - UNLOCKREGION(dbmp); - return (0); - } - - /* Convert the page address to a buffer header. */ - bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); - - /* Set/clear the page bits. */ - if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) { - ++mp->stat.st_page_clean; - --mp->stat.st_page_dirty; - F_CLR(bhp, BH_DIRTY); - } - if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) { - --mp->stat.st_page_clean; - ++mp->stat.st_page_dirty; - F_SET(bhp, BH_DIRTY); - } - if (LF_ISSET(DB_MPOOL_DISCARD)) - F_SET(bhp, BH_DISCARD); - - /* - * Check for a reference count going to zero. This can happen if the - * application returns a page twice. - */ - if (bhp->ref == 0) { - __db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned", - __memp_fn(dbmfp), (u_long)bhp->pgno); - UNLOCKREGION(dbmp); - return (EINVAL); - } - - /* - * If more than one reference to the page, we're done. Ignore the - * discard flags (for now) and leave it at its position in the LRU - * chain. The rest gets done at last reference close. - */ - if (--bhp->ref > 0) { - UNLOCKREGION(dbmp); - return (0); - } - - /* - * If this buffer is scheduled for writing because of a checkpoint, we - * need to write it (if we marked it dirty), or update the checkpoint - * counters (if we didn't mark it dirty). If we try to write it and - * can't, that's not necessarily an error, but set a flag so that the - * next time the memp_sync function runs we try writing it there, as - * the checkpoint application better be able to write all of the files. - */ - if (F_ISSET(bhp, BH_WRITE)) { - if (F_ISSET(bhp, BH_DIRTY)) { - if (__memp_bhwrite(dbmp, - dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote) - F_SET(mp, MP_LSN_RETRY); - } else { - F_CLR(bhp, BH_WRITE); - - --dbmfp->mfp->lsn_cnt; - --mp->lsn_cnt; - } - } - - /* Move the buffer to the head/tail of the LRU chain. */ - SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh); - if (F_ISSET(bhp, BH_DISCARD)) - SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh); - else - SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); - - - UNLOCKREGION(dbmp); - return (0); -} diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c deleted file mode 100644 index 1940d3b198..0000000000 --- a/db2/mp/mp_fset.c +++ /dev/null @@ -1,83 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_fset.c 10.16 (Sleepycat) 9/27/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -/* - * memp_fset -- - * Mpool page set-flag routine. - */ -int -memp_fset(dbmfp, pgaddr, flags) - DB_MPOOLFILE *dbmfp; - void *pgaddr; - u_int32_t flags; -{ - BH *bhp; - DB_MPOOL *dbmp; - MPOOL *mp; - int ret; - - dbmp = dbmfp->dbmp; - mp = dbmp->mp; - - MP_PANIC_CHECK(dbmp); - - /* Validate arguments. */ - if (flags == 0) - return (__db_ferr(dbmp->dbenv, "memp_fset", 1)); - - if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags, - DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0) - return (ret); - if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset", - flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0) - return (ret); - - if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) { - __db_err(dbmp->dbenv, - "%s: dirty flag set for readonly file page", - __memp_fn(dbmfp)); - return (EACCES); - } - - /* Convert the page address to a buffer header. */ - bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); - - LOCKREGION(dbmp); - - if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) { - ++mp->stat.st_page_clean; - --mp->stat.st_page_dirty; - F_CLR(bhp, BH_DIRTY); - } - if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) { - --mp->stat.st_page_clean; - ++mp->stat.st_page_dirty; - F_SET(bhp, BH_DIRTY); - } - if (LF_ISSET(DB_MPOOL_DISCARD)) - F_SET(bhp, BH_DISCARD); - - UNLOCKREGION(dbmp); - return (0); -} diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c deleted file mode 100644 index 4c90fc438f..0000000000 --- a/db2/mp/mp_open.c +++ /dev/null @@ -1,221 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_open.c 10.27 (Sleepycat) 10/1/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -/* - * memp_open -- - * Initialize and/or join a memory pool. - */ -int -memp_open(path, flags, mode, dbenv, retp) - const char *path; - u_int32_t flags; - int mode; - DB_ENV *dbenv; - DB_MPOOL **retp; -{ - DB_MPOOL *dbmp; - size_t cachesize; - int is_private, ret; - - /* Validate arguments. */ -#ifdef HAVE_SPINLOCKS -#define OKFLAGS (DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD) -#else -#define OKFLAGS (DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP) -#endif - if ((ret = __db_fchk(dbenv, "memp_open", flags, OKFLAGS)) != 0) - return (ret); - - /* Extract fields from DB_ENV structure. */ - cachesize = dbenv == NULL ? 0 : dbenv->mp_size; - - /* Create and initialize the DB_MPOOL structure. */ - if ((ret = __os_calloc(1, sizeof(DB_MPOOL), &dbmp)) != 0) - return (ret); - LIST_INIT(&dbmp->dbregq); - TAILQ_INIT(&dbmp->dbmfq); - - dbmp->dbenv = dbenv; - - /* Decide if it's possible for anyone else to access the pool. */ - is_private = - (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE); - - /* - * Map in the region. We do locking regardless, as portions of it are - * implemented in common code (if we put the region in a file, that is). - */ - F_SET(dbmp, MP_LOCKREGION); - if ((ret = __memp_ropen(dbmp, - path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0) - goto err; - F_CLR(dbmp, MP_LOCKREGION); - - /* - * If there's concurrent access, then we have to lock the region. - * If it's threaded, then we have to lock both the handles and the - * region, and we need to allocate a mutex for that purpose. - */ - if (!is_private) - F_SET(dbmp, MP_LOCKREGION); - if (LF_ISSET(DB_THREAD)) { - F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION); - LOCKREGION(dbmp); - ret = __memp_alloc(dbmp, - sizeof(db_mutex_t), NULL, &dbmp->mutexp); - UNLOCKREGION(dbmp); - if (ret != 0) { - (void)memp_close(dbmp); - goto err; - } - LOCKINIT(dbmp, dbmp->mutexp); - } - - *retp = dbmp; - return (0); - -err: if (dbmp != NULL) - __os_free(dbmp, sizeof(DB_MPOOL)); - return (ret); -} - -/* - * memp_close -- - * Close a memory pool. - */ -int -memp_close(dbmp) - DB_MPOOL *dbmp; -{ - DB_MPOOLFILE *dbmfp; - DB_MPREG *mpreg; - int ret, t_ret; - - ret = 0; - - MP_PANIC_CHECK(dbmp); - - /* Discard DB_MPREGs. */ - while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) { - LIST_REMOVE(mpreg, q); - __os_free(mpreg, sizeof(DB_MPREG)); - } - - /* Discard DB_MPOOLFILEs. */ - while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL) - if ((t_ret = memp_fclose(dbmfp)) != 0 && ret == 0) - ret = t_ret; - - /* Discard thread mutex. */ - if (F_ISSET(dbmp, MP_LOCKHANDLE)) { - LOCKREGION(dbmp); - __db_shalloc_free(dbmp->addr, dbmp->mutexp); - UNLOCKREGION(dbmp); - } - - /* Close the region. */ - if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0) - ret = t_ret; - - if (dbmp->reginfo.path != NULL) - __os_freestr(dbmp->reginfo.path); - __os_free(dbmp, sizeof(DB_MPOOL)); - - return (ret); -} - -/* - * __memp_panic -- - * Panic a memory pool. - * - * PUBLIC: void __memp_panic __P((DB_ENV *)); - */ -void -__memp_panic(dbenv) - DB_ENV *dbenv; -{ - if (dbenv->mp_info != NULL) - dbenv->mp_info->mp->rlayout.panic = 1; -} - -/* - * memp_unlink -- - * Exit a memory pool. - */ -int -memp_unlink(path, force, dbenv) - const char *path; - int force; - DB_ENV *dbenv; -{ - REGINFO reginfo; - int ret; - - memset(®info, 0, sizeof(reginfo)); - reginfo.dbenv = dbenv; - reginfo.appname = DB_APP_NONE; - if (path != NULL && (ret = __os_strdup(path, ®info.path)) != 0) - return (ret); - reginfo.file = DB_DEFAULT_MPOOL_FILE; - ret = __db_runlink(®info, force); - if (reginfo.path != NULL) - __os_freestr(reginfo.path); - return (ret); -} - -/* - * memp_register -- - * Register a file type's pgin, pgout routines. - */ -int -memp_register(dbmp, ftype, pgin, pgout) - DB_MPOOL *dbmp; - int ftype; - int (*pgin) __P((db_pgno_t, void *, DBT *)); - int (*pgout) __P((db_pgno_t, void *, DBT *)); -{ - DB_MPREG *mpr; - int ret; - - MP_PANIC_CHECK(dbmp); - - if ((ret = __os_malloc(sizeof(DB_MPREG), NULL, &mpr)) != 0) - return (ret); - - mpr->ftype = ftype; - mpr->pgin = pgin; - mpr->pgout = pgout; - - /* - * Insert at the head. Because we do a linear walk, we'll find - * the most recent registry in the case of multiple entries, so - * we don't have to check for multiple registries. - */ - LOCKHANDLE(dbmp, dbmp->mutexp); - LIST_INSERT_HEAD(&dbmp->dbregq, mpr, q); - UNLOCKHANDLE(dbmp, dbmp->mutexp); - - return (0); -} diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c deleted file mode 100644 index 84c782e781..0000000000 --- a/db2/mp/mp_pr.c +++ /dev/null @@ -1,304 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_pr.c 10.30 (Sleepycat) 10/1/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#endif - -#include "db_int.h" -#include "db_page.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "db_auto.h" -#include "db_ext.h" -#include "common_ext.h" - -static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *)); - -/* - * memp_stat -- - * Display MPOOL statistics. - */ -int -memp_stat(dbmp, gspp, fspp, db_malloc) - DB_MPOOL *dbmp; - DB_MPOOL_STAT **gspp; - DB_MPOOL_FSTAT ***fspp; - void *(*db_malloc) __P((size_t)); -{ - DB_MPOOL_FSTAT **tfsp; - MPOOLFILE *mfp; - size_t len, nlen; - int ret; - char *name; - - MP_PANIC_CHECK(dbmp); - - /* Allocate space for the global statistics. */ - if (gspp != NULL) { - *gspp = NULL; - - if ((ret = __os_malloc(sizeof(**gspp), db_malloc, gspp)) != 0) - return (ret); - - LOCKREGION(dbmp); - - /* Copy out the global statistics. */ - **gspp = dbmp->mp->stat; - (*gspp)->st_hash_buckets = dbmp->mp->htab_buckets; - (*gspp)->st_region_wait = - dbmp->mp->rlayout.lock.mutex_set_wait; - (*gspp)->st_region_nowait = - dbmp->mp->rlayout.lock.mutex_set_nowait; - (*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt; - (*gspp)->st_regsize = dbmp->mp->rlayout.size; - - UNLOCKREGION(dbmp); - } - - if (fspp != NULL) { - *fspp = NULL; - - LOCKREGION(dbmp); - - /* Count the MPOOLFILE structures. */ - for (len = 0, - mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; - ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) - ; - - UNLOCKREGION(dbmp); - - if (len == 0) - return (0); - - /* Allocate space for the pointers. */ - len = (len + 1) * sizeof(DB_MPOOL_FSTAT *); - if ((ret = __os_malloc(len, db_malloc, fspp)) != 0) - return (ret); - - LOCKREGION(dbmp); - - /* Build each individual entry. */ - for (tfsp = *fspp, - mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; - ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - name = __memp_fns(dbmp, mfp); - nlen = strlen(name); - len = sizeof(DB_MPOOL_FSTAT) + nlen + 1; - if ((ret = __os_malloc(len, db_malloc, tfsp)) != 0) - return (ret); - **tfsp = mfp->stat; - (*tfsp)->file_name = (char *) - (u_int8_t *)*tfsp + sizeof(DB_MPOOL_FSTAT); - memcpy((*tfsp)->file_name, name, nlen + 1); - } - *tfsp = NULL; - - UNLOCKREGION(dbmp); - } - return (0); -} - -/* - * __memp_fn -- - * On errors we print whatever is available as the file name. - * - * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *)); - */ -char * -__memp_fn(dbmfp) - DB_MPOOLFILE *dbmfp; -{ - return (__memp_fns(dbmfp->dbmp, dbmfp->mfp)); -} - -/* - * __memp_fns -- - * On errors we print whatever is available as the file name. - * - * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); - * - */ -char * -__memp_fns(dbmp, mfp) - DB_MPOOL *dbmp; - MPOOLFILE *mfp; -{ - if (mfp->path_off == 0) - return ((char *)"temporary"); - - return ((char *)R_ADDR(dbmp, mfp->path_off)); -} - -#define FMAP_ENTRIES 200 /* Files we map. */ - -#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */ -#define MPOOL_DUMP_LRU 0x02 /* Debug LRU chains. */ -#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */ -#define MPOOL_DUMP_ALL 0x07 /* Debug all. */ - - -/* - * __memp_dump_region -- - * Display MPOOL structures. - * - * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *)); - */ -void -__memp_dump_region(dbmp, area, fp) - DB_MPOOL *dbmp; - char *area; - FILE *fp; -{ - BH *bhp; - DB_HASHTAB *htabp; - DB_MPOOLFILE *dbmfp; - MPOOL *mp; - MPOOLFILE *mfp; - size_t bucket, fmap[FMAP_ENTRIES + 1]; - u_int32_t flags; - int cnt; - - /* Make it easy to call from the debugger. */ - if (fp == NULL) - fp = stderr; - - for (flags = 0; *area != '\0'; ++area) - switch (*area) { - case 'A': - LF_SET(MPOOL_DUMP_ALL); - break; - case 'h': - LF_SET(MPOOL_DUMP_HASH); - break; - case 'l': - LF_SET(MPOOL_DUMP_LRU); - break; - case 'm': - LF_SET(MPOOL_DUMP_MEM); - break; - } - - LOCKREGION(dbmp); - - mp = dbmp->mp; - - /* Display MPOOL structures. */ - (void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n", - DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr); - - /* Display the MPOOLFILE structures. */ - cnt = 0; - for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) { - (void)fprintf(fp, "file #%d: %s: refs %lu, type %ld, %s\n", - cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref, - (long)mfp->ftype, - F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write"); - if (cnt < FMAP_ENTRIES) - fmap[cnt] = R_OFFSET(dbmp, mfp); - } - - for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); - dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) { - (void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n", - cnt + 1, __memp_fn(dbmfp), dbmfp->fd, - F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write"); - if (cnt < FMAP_ENTRIES) - fmap[cnt] = R_OFFSET(dbmp, mfp); - } - if (cnt < FMAP_ENTRIES) - fmap[cnt] = INVALID; - else - fmap[FMAP_ENTRIES] = INVALID; - - /* Display the hash table list of BH's. */ - if (LF_ISSET(MPOOL_DUMP_HASH)) { - (void)fprintf(fp, - "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n", - DB_LINE, (u_long)mp->htab_buckets); - for (htabp = dbmp->htab, - bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) { - if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL) - (void)fprintf(fp, "%lu:\n", (u_long)bucket); - for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) - __memp_pbh(dbmp, bhp, fmap, fp); - } - } - - /* Display the LRU list of BH's. */ - if (LF_ISSET(MPOOL_DUMP_LRU)) { - (void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE); - (void)fprintf(fp, "pageno, file, ref, address\n"); - for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) - __memp_pbh(dbmp, bhp, fmap, fp); - } - - if (LF_ISSET(MPOOL_DUMP_MEM)) - __db_shalloc_dump(dbmp->addr, fp); - - UNLOCKREGION(dbmp); - - /* Flush in case we're debugging. */ - (void)fflush(fp); -} - -/* - * __memp_pbh -- - * Display a BH structure. - */ -static void -__memp_pbh(dbmp, bhp, fmap, fp) - DB_MPOOL *dbmp; - BH *bhp; - size_t *fmap; - FILE *fp; -{ - static const FN fn[] = { - { BH_CALLPGIN, "callpgin" }, - { BH_DIRTY, "dirty" }, - { BH_DISCARD, "discard" }, - { BH_LOCKED, "locked" }, - { BH_TRASH, "trash" }, - { BH_WRITE, "write" }, - { 0 }, - }; - int i; - - for (i = 0; i < FMAP_ENTRIES; ++i) - if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset) - break; - - if (fmap[i] == INVALID) - (void)fprintf(fp, " %4lu, %lu, %2lu, %lu", - (u_long)bhp->pgno, (u_long)bhp->mf_offset, - (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp)); - else - (void)fprintf(fp, " %4lu, #%d, %2lu, %lu", - (u_long)bhp->pgno, i + 1, - (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp)); - - __db_prflags(bhp->flags, fn, fp); - - (void)fprintf(fp, "\n"); -} diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c deleted file mode 100644 index b9c92f2e13..0000000000 --- a/db2/mp/mp_region.c +++ /dev/null @@ -1,330 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -/* - * __memp_reg_alloc -- - * Allocate some space in the mpool region, with locking. - * - * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *)); - */ -int -__memp_reg_alloc(dbmp, len, offsetp, retp) - DB_MPOOL *dbmp; - size_t len, *offsetp; - void *retp; -{ - int ret; - - LOCKREGION(dbmp); - ret = __memp_alloc(dbmp, len, offsetp, retp); - UNLOCKREGION(dbmp); - return (ret); -} - -/* - * __memp_alloc -- - * Allocate some space in the mpool region. - * - * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *)); - */ -int -__memp_alloc(dbmp, len, offsetp, retp) - DB_MPOOL *dbmp; - size_t len, *offsetp; - void *retp; -{ - BH *bhp, *nbhp; - MPOOL *mp; - MPOOLFILE *mfp; - size_t fsize, total; - int nomore, restart, ret, wrote; - void *p; - - mp = dbmp->mp; - - nomore = 0; -alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { - if (offsetp != NULL) - *offsetp = R_OFFSET(dbmp, p); - *(void **)retp = p; - return (0); - } - if (nomore) { - __db_err(dbmp->dbenv, - "Unable to allocate %lu bytes from mpool shared region: %s\n", - (u_long)len, strerror(ret)); - return (ret); - } - - /* Look for a buffer on the free list that's the right size. */ - for (bhp = - SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) { - nbhp = SH_TAILQ_NEXT(bhp, q, __bh); - - if (__db_shsizeof(bhp) == len) { - SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh); - if (offsetp != NULL) - *offsetp = R_OFFSET(dbmp, bhp); - *(void **)retp = bhp; - return (0); - } - } - - /* Discard from the free list until we've freed enough memory. */ - total = 0; - for (bhp = - SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) { - nbhp = SH_TAILQ_NEXT(bhp, q, __bh); - - SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh); - __db_shalloc_free(dbmp->addr, bhp); - --mp->stat.st_page_clean; - - /* - * Retry as soon as we've freed up sufficient space. If we - * will have to coalesce memory to satisfy the request, don't - * try until it's likely (possible?) that we'll succeed. - */ - total += fsize = __db_shsizeof(bhp); - if (fsize >= len || total >= 3 * len) - goto alloc; - } - -retry: /* Find a buffer we can flush; pure LRU. */ - restart = total = 0; - for (bhp = - SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { - nbhp = SH_TAILQ_NEXT(bhp, q, __bh); - - /* Ignore pinned or locked (I/O in progress) buffers. */ - if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) - continue; - - /* Find the associated MPOOLFILE. */ - mfp = R_ADDR(dbmp, bhp->mf_offset); - - /* - * Write the page if it's dirty. - * - * If we wrote the page, fall through and free the buffer. We - * don't have to rewalk the list to acquire the buffer because - * it was never available for any other process to modify it. - * If we didn't write the page, but we discarded and reacquired - * the region lock, restart the buffer list walk. If we neither - * wrote the buffer nor discarded the region lock, continue down - * the buffer list. - */ - if (F_ISSET(bhp, BH_DIRTY)) { - if ((ret = __memp_bhwrite(dbmp, - mfp, bhp, &restart, &wrote)) != 0) - return (ret); - - /* - * It's possible that another process wants this buffer - * and incremented the ref count while we were writing - * it. - */ - if (bhp->ref != 0) - goto retry; - - if (wrote) - ++mp->stat.st_rw_evict; - else { - if (restart) - goto retry; - continue; - } - } else - ++mp->stat.st_ro_evict; - - /* - * Check to see if the buffer is the size we're looking for. - * If it is, simply reuse it. - */ - total += fsize = __db_shsizeof(bhp); - if (fsize == len) { - __memp_bhfree(dbmp, mfp, bhp, 0); - - if (offsetp != NULL) - *offsetp = R_OFFSET(dbmp, bhp); - *(void **)retp = bhp; - return (0); - } - - /* Free the buffer. */ - __memp_bhfree(dbmp, mfp, bhp, 1); - - /* - * Retry as soon as we've freed up sufficient space. If we - * have to coalesce of memory to satisfy the request, don't - * try until it's likely (possible?) that we'll succeed. - */ - if (fsize >= len || total >= 3 * len) - goto alloc; - - /* Restart the walk if we discarded the region lock. */ - if (restart) - goto retry; - } - nomore = 1; - goto alloc; -} - -/* - * __memp_ropen -- - * Attach to, and optionally create, the mpool region. - * - * PUBLIC: int __memp_ropen - * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t)); - */ -int -__memp_ropen(dbmp, path, cachesize, mode, is_private, flags) - DB_MPOOL *dbmp; - const char *path; - size_t cachesize; - int mode, is_private; - u_int32_t flags; -{ - MPOOL *mp; - size_t rlen; - int defcache, ret; - - /* - * Unlike other DB subsystems, mpool can't simply grow the region - * because it returns pointers into the region to its clients. To - * "grow" the region, we'd have to allocate a new region and then - * store a region number in the structures that reference regional - * objects. It's reasonable that we fail regardless, as clients - * shouldn't have every page in the region pinned, so the only - * "failure" mode should be a performance penalty because we don't - * find a page in the cache that we'd like to have found. - * - * Up the user's cachesize by 25% to account for our overhead. - */ - defcache = 0; - if (cachesize < DB_CACHESIZE_MIN) { - if (cachesize == 0) { - defcache = 1; - cachesize = DB_CACHESIZE_DEF; - } else - cachesize = DB_CACHESIZE_MIN; - } - rlen = cachesize + cachesize / 4; - - /* - * Map in the region. - * - * If it's a private mpool, use malloc, it's a lot faster than - * instantiating a region. - */ - dbmp->reginfo.dbenv = dbmp->dbenv; - dbmp->reginfo.appname = DB_APP_NONE; - if (path == NULL) - dbmp->reginfo.path = NULL; - else - if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0) - return (ret); - dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE; - dbmp->reginfo.mode = mode; - dbmp->reginfo.size = rlen; - dbmp->reginfo.dbflags = flags; - dbmp->reginfo.flags = 0; - if (defcache) - F_SET(&dbmp->reginfo, REGION_SIZEDEF); - - /* - * If we're creating a temporary region, don't use any standard - * naming. - */ - if (is_private) { - dbmp->reginfo.appname = DB_APP_TMP; - dbmp->reginfo.file = NULL; - F_SET(&dbmp->reginfo, REGION_PRIVATE); - } - - if ((ret = __db_rattach(&dbmp->reginfo)) != 0) { - if (dbmp->reginfo.path != NULL) - __os_freestr(dbmp->reginfo.path); - return (ret); - } - - /* - * The MPOOL structure is first in the region, the rest of the region - * is free space. - */ - dbmp->mp = dbmp->reginfo.addr; - dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL); - - /* Initialize a created region. */ - if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) { - mp = dbmp->mp; - SH_TAILQ_INIT(&mp->bhq); - SH_TAILQ_INIT(&mp->bhfq); - SH_TAILQ_INIT(&mp->mpfq); - - __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL)); - - /* - * Assume we want to keep the hash chains with under 10 pages - * on each chain. We don't know the pagesize in advance, and - * it may differ for different files. Use a pagesize of 1K for - * the calculation -- we walk these chains a lot, they should - * be short. - */ - mp->htab_buckets = - __db_tablesize((cachesize / (1 * 1024)) / 10); - - /* Allocate hash table space and initialize it. */ - if ((ret = __db_shalloc(dbmp->addr, - mp->htab_buckets * sizeof(DB_HASHTAB), - 0, &dbmp->htab)) != 0) - goto err; - __db_hashinit(dbmp->htab, mp->htab_buckets); - mp->htab = R_OFFSET(dbmp, dbmp->htab); - - ZERO_LSN(mp->lsn); - mp->lsn_cnt = 0; - - memset(&mp->stat, 0, sizeof(mp->stat)); - mp->stat.st_cachesize = cachesize; - - mp->flags = 0; - } - - /* Get the local hash table address. */ - dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); - - UNLOCKREGION(dbmp); - return (0); - -err: UNLOCKREGION(dbmp); - (void)__db_rdetach(&dbmp->reginfo); - if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) - (void)memp_unlink(path, 1, dbmp->dbenv); - - if (dbmp->reginfo.path != NULL) - __os_freestr(dbmp->reginfo.path); - return (ret); -} diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c deleted file mode 100644 index 535348517c..0000000000 --- a/db2/mp/mp_sync.c +++ /dev/null @@ -1,549 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)mp_sync.c 10.31 (Sleepycat) 12/11/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <stdlib.h> -#endif - -#include "db_int.h" -#include "shqueue.h" -#include "db_shash.h" -#include "mp.h" -#include "common_ext.h" - -static int __bhcmp __P((const void *, const void *)); -static int __memp_fsync __P((DB_MPOOLFILE *)); - -/* - * memp_sync -- - * Mpool sync function. - */ -int -memp_sync(dbmp, lsnp) - DB_MPOOL *dbmp; - DB_LSN *lsnp; -{ - BH *bhp, **bharray; - DB_ENV *dbenv; - MPOOL *mp; - MPOOLFILE *mfp; - int ar_cnt, nalloc, next, maxpin, ret, wrote; - - MP_PANIC_CHECK(dbmp); - - dbenv = dbmp->dbenv; - mp = dbmp->mp; - - if (dbenv->lg_info == NULL) { - __db_err(dbenv, "memp_sync: requires logging"); - return (EINVAL); - } - - /* - * We try and write the buffers in page order: it should reduce seeks - * by the underlying filesystem and possibly reduce the actual number - * of writes. We don't want to hold the region lock while we write - * the buffers, so only hold it lock while we create a list. Get a - * good-size block of memory to hold buffer pointers, we don't want - * to run out. - */ - LOCKREGION(dbmp); - nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10; - UNLOCKREGION(dbmp); - - if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0) - return (ret); - - LOCKREGION(dbmp); - - /* - * If the application is asking about a previous call to memp_sync(), - * and we haven't found any buffers that the application holding the - * pin couldn't write, return yes or no based on the current count. - * Note, if the application is asking about a LSN *smaller* than one - * we've already handled or are currently handling, then we return a - * result based on the count for the larger LSN. - */ - if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) { - if (mp->lsn_cnt == 0) { - *lsnp = mp->lsn; - ret = 0; - } else - ret = DB_INCOMPLETE; - goto done; - } - - /* Else, it's a new checkpoint. */ - F_CLR(mp, MP_LSN_RETRY); - - /* - * Save the LSN. We know that it's a new LSN or larger than the one - * for which we were already doing a checkpoint. (BTW, I don't expect - * to see multiple LSN's from the same or multiple processes, but You - * Just Never Know. Responding as if they all called with the largest - * of the LSNs specified makes everything work.) - * - * We don't currently use the LSN we save. We could potentially save - * the last-written LSN in each buffer header and use it to determine - * what buffers need to be written. The problem with this is that it's - * sizeof(LSN) more bytes of buffer header. We currently write all the - * dirty buffers instead. - * - * Walk the list of shared memory segments clearing the count of - * buffers waiting to be written. - */ - mp->lsn = *lsnp; - mp->lsn_cnt = 0; - for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) - mfp->lsn_cnt = 0; - - /* - * Walk the list of buffers and mark all dirty buffers to be written - * and all pinned buffers to be potentially written (we can't know if - * we'll need to write them until the holding process returns them to - * the cache). We do this in one pass while holding the region locked - * so that processes can't make new buffers dirty, causing us to never - * finish. Since the application may have restarted the sync, clear - * any BH_WRITE flags that appear to be left over from previous calls. - * - * We don't want to pin down the entire buffer cache, otherwise we'll - * starve threads needing new pages. Don't pin down more than 80% of - * the cache. - * - * Keep a count of the total number of buffers we need to write in - * MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count. - */ - ar_cnt = 0; - maxpin = ((mp->stat.st_page_dirty + mp->stat.st_page_clean) * 8) / 10; - for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) - if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) { - F_SET(bhp, BH_WRITE); - - ++mp->lsn_cnt; - - mfp = R_ADDR(dbmp, bhp->mf_offset); - ++mfp->lsn_cnt; - - /* - * If the buffer isn't in use, we should be able to - * write it immediately, so increment the reference - * count to lock it and its contents down, and then - * save a reference to it. - * - * If we've run out space to store buffer references, - * we're screwed. We don't want to realloc the array - * while holding a region lock, so we set the flag to - * force the checkpoint to be done again, from scratch, - * later. - * - * If we've pinned down too much of the cache stop, and - * set a flag to force the checkpoint to be tried again - * later. - */ - if (bhp->ref == 0) { - ++bhp->ref; - bharray[ar_cnt] = bhp; - if (++ar_cnt >= nalloc || ar_cnt >= maxpin) { - F_SET(mp, MP_LSN_RETRY); - break; - } - } - } else - if (F_ISSET(bhp, BH_WRITE)) - F_CLR(bhp, BH_WRITE); - - /* If there no buffers we can write immediately, we're done. */ - if (ar_cnt == 0) { - ret = mp->lsn_cnt ? DB_INCOMPLETE : 0; - goto done; - } - - UNLOCKREGION(dbmp); - - /* Sort the buffers we're going to write. */ - qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp); - - LOCKREGION(dbmp); - - /* Walk the array, writing buffers. */ - for (next = 0; next < ar_cnt; ++next) { - /* - * It's possible for a thread to have gotten the buffer since - * we listed it for writing. If the reference count is still - * 1, we're the only ones using the buffer, go ahead and write. - * If it's >1, then skip the buffer and assume that it will be - * written when it's returned to the cache. - */ - if (bharray[next]->ref > 1) { - --bharray[next]->ref; - continue; - } - - /* Write the buffer. */ - mfp = R_ADDR(dbmp, bharray[next]->mf_offset); - ret = __memp_bhwrite(dbmp, mfp, bharray[next], NULL, &wrote); - - /* Release the buffer. */ - --bharray[next]->ref; - - /* If there's an error, release the rest of the buffers. */ - if (ret != 0 || !wrote) { - /* - * Any process syncing the shared memory buffer pool - * had better be able to write to any underlying file. - * Be understanding, but firm, on this point. - */ - if (ret == 0) { - __db_err(dbenv, "%s: unable to flush page: %lu", - __memp_fns(dbmp, mfp), - (u_long)bharray[next]->pgno); - ret = EPERM; - } - - while (++next < ar_cnt) - --bharray[next]->ref; - goto err; - } - } - ret = mp->lsn_cnt != 0 || - F_ISSET(mp, MP_LSN_RETRY) ? DB_INCOMPLETE : 0; - -done: - if (0) { -err: /* - * On error, clear: - * MPOOL->lsn_cnt (the total sync count) - * MPOOLFILE->lsn_cnt (the per-file sync count) - * BH_WRITE flag (the scheduled for writing flag) - */ - mp->lsn_cnt = 0; - for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) - mfp->lsn_cnt = 0; - for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) - F_CLR(bhp, BH_WRITE); - } - UNLOCKREGION(dbmp); - __os_free(bharray, nalloc * sizeof(BH *)); - return (ret); -} - -/* - * memp_fsync -- - * Mpool file sync function. - */ -int -memp_fsync(dbmfp) - DB_MPOOLFILE *dbmfp; -{ - DB_MPOOL *dbmp; - int is_tmp; - - dbmp = dbmfp->dbmp; - - MP_PANIC_CHECK(dbmp); - - /* - * If this handle doesn't have a file descriptor that's open for - * writing, or if the file is a temporary, there's no reason to - * proceed further. - */ - if (F_ISSET(dbmfp, MP_READONLY)) - return (0); - - LOCKREGION(dbmp); - is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP); - UNLOCKREGION(dbmp); - if (is_tmp) - return (0); - - return (__memp_fsync(dbmfp)); -} - -/* - * __mp_xxx_fd -- - * Return a file descriptor for DB 1.85 compatibility locking. - * - * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *)); - */ -int -__mp_xxx_fd(dbmfp, fdp) - DB_MPOOLFILE *dbmfp; - int *fdp; -{ - int ret; - - /* - * This is a truly spectacular layering violation, intended ONLY to - * support compatibility for the DB 1.85 DB->fd call. - * - * Sync the database file to disk, creating the file as necessary. - * - * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3). - * The MP_READONLY test isn't interesting because we will either - * already have a file descriptor (we opened the database file for - * reading) or we aren't readonly (we created the database which - * requires write privileges). The MP_TEMP test isn't interesting - * because we want to write to the backing file regardless so that - * we get a file descriptor to return. - */ - ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0; - - return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret); -} - -/* - * __memp_fsync -- - * Mpool file internal sync function. - */ -static int -__memp_fsync(dbmfp) - DB_MPOOLFILE *dbmfp; -{ - BH *bhp, **bharray; - DB_MPOOL *dbmp; - MPOOL *mp; - size_t mf_offset; - int ar_cnt, incomplete, nalloc, next, ret, wrote; - - ret = 0; - dbmp = dbmfp->dbmp; - mp = dbmp->mp; - mf_offset = R_OFFSET(dbmp, dbmfp->mfp); - - /* - * We try and write the buffers in page order: it should reduce seeks - * by the underlying filesystem and possibly reduce the actual number - * of writes. We don't want to hold the region lock while we write - * the buffers, so only hold it lock while we create a list. Get a - * good-size block of memory to hold buffer pointers, we don't want - * to run out. - */ - LOCKREGION(dbmp); - nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10; - UNLOCKREGION(dbmp); - - if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0) - return (ret); - - LOCKREGION(dbmp); - - /* - * Walk the LRU list of buffer headers, and get a list of buffers to - * write for this MPOOLFILE. - */ - ar_cnt = incomplete = 0; - for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) { - if (!F_ISSET(bhp, BH_DIRTY) || bhp->mf_offset != mf_offset) - continue; - if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) { - incomplete = 1; - continue; - } - - ++bhp->ref; - bharray[ar_cnt] = bhp; - - /* - * If we've run out space to store buffer references, we're - * screwed, as we don't want to realloc the array holding a - * region lock. Set the incomplete flag -- the only way we - * can get here is if the file is active in the buffer cache, - * which is the same thing as finding pinned buffers. - */ - if (++ar_cnt >= nalloc) { - incomplete = 1; - break; - } - } - - UNLOCKREGION(dbmp); - - /* Sort the buffers we're going to write. */ - if (ar_cnt != 0) - qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp); - - LOCKREGION(dbmp); - - /* Walk the array, writing buffers. */ - for (next = 0; next < ar_cnt; ++next) { - /* - * It's possible for a thread to have gotten the buffer since - * we listed it for writing. If the reference count is still - * 1, we're the only ones using the buffer, go ahead and write. - * If it's >1, then skip the buffer. - */ - if (bharray[next]->ref > 1) { - incomplete = 1; - - --bharray[next]->ref; - continue; - } - - /* Write the buffer. */ - ret = __memp_pgwrite(dbmfp, bharray[next], NULL, &wrote); - - /* Release the buffer. */ - --bharray[next]->ref; - - /* If there's an error, release the rest of the buffers. */ - if (ret != 0) { - while (++next < ar_cnt) - --bharray[next]->ref; - goto err; - } - - /* - * If we didn't write the buffer for some reason, don't return - * success. - */ - if (!wrote) - incomplete = 1; - } - -err: UNLOCKREGION(dbmp); - - __os_free(bharray, nalloc * sizeof(BH *)); - - /* - * Sync the underlying file as the last thing we do, so that the OS - * has maximal opportunity to flush buffers before we request it. - * - * XXX: - * Don't lock the region around the sync, fsync(2) has no atomicity - * issues. - */ - if (ret == 0) - return (incomplete ? DB_INCOMPLETE : __os_fsync(dbmfp->fd)); - return (ret); -} - -/* - * memp_trickle -- - * Keep a specified percentage of the buffers clean. - */ -int -memp_trickle(dbmp, pct, nwrotep) - DB_MPOOL *dbmp; - int pct, *nwrotep; -{ - BH *bhp; - MPOOL *mp; - MPOOLFILE *mfp; - u_long total; - int ret, wrote; - - MP_PANIC_CHECK(dbmp); - - mp = dbmp->mp; - if (nwrotep != NULL) - *nwrotep = 0; - - if (pct < 1 || pct > 100) - return (EINVAL); - - LOCKREGION(dbmp); - - /* - * If there are sufficient clean buffers, or no buffers or no dirty - * buffers, we're done. - * - * XXX - * Using st_page_clean and st_page_dirty is our only choice at the - * moment, but it's not as correct as we might like in the presence - * of pools with more than one buffer size, as a free 512-byte buffer - * isn't the same as a free 8K buffer. - */ -loop: total = mp->stat.st_page_clean + mp->stat.st_page_dirty; - if (total == 0 || mp->stat.st_page_dirty == 0 || - (mp->stat.st_page_clean * 100) / total >= (u_long)pct) { - UNLOCKREGION(dbmp); - return (0); - } - - /* Loop until we write a buffer. */ - for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) { - if (bhp->ref != 0 || - !F_ISSET(bhp, BH_DIRTY) || F_ISSET(bhp, BH_LOCKED)) - continue; - - mfp = R_ADDR(dbmp, bhp->mf_offset); - - /* - * We can't write to temporary files -- see the comment in - * mp_bh.c:__memp_bhwrite(). - */ - if (F_ISSET(mfp, MP_TEMP)) - continue; - - if ((ret = __memp_bhwrite(dbmp, mfp, bhp, NULL, &wrote)) != 0) - goto err; - - /* - * Any process syncing the shared memory buffer pool had better - * be able to write to any underlying file. Be understanding, - * but firm, on this point. - */ - if (!wrote) { - __db_err(dbmp->dbenv, "%s: unable to flush page: %lu", - __memp_fns(dbmp, mfp), (u_long)bhp->pgno); - ret = EPERM; - goto err; - } - - ++mp->stat.st_page_trickle; - if (nwrotep != NULL) - ++*nwrotep; - goto loop; - } - - /* No more buffers to write. */ - ret = 0; - -err: UNLOCKREGION(dbmp); - return (ret); -} - -static int -__bhcmp(p1, p2) - const void *p1, *p2; -{ - BH *bhp1, *bhp2; - - bhp1 = *(BH * const *)p1; - bhp2 = *(BH * const *)p2; - - /* Sort by file (shared memory pool offset). */ - if (bhp1->mf_offset < bhp2->mf_offset) - return (-1); - if (bhp1->mf_offset > bhp2->mf_offset) - return (1); - - /* - * !!! - * Defend against badly written quicksort code calling the comparison - * function with two identical pointers (e.g., WATCOM C++ (Power++)). - */ - if (bhp1->pgno < bhp2->pgno) - return (-1); - if (bhp1->pgno > bhp2->pgno) - return (1); - return (0); -} |