diff options
author | Ulrich Drepper <drepper@redhat.com> | 1999-06-13 13:36:34 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 1999-06-13 13:36:34 +0000 |
commit | ec239360d13518a13f572b635d036c7d10028010 (patch) | |
tree | bdb5111363f45d2107849c2456b575d72779174c /db2/mp | |
parent | fc3703521650a9b6db910a50c4fc0f410496e134 (diff) | |
download | glibc-ec239360d13518a13f572b635d036c7d10028010.tar glibc-ec239360d13518a13f572b635d036c7d10028010.tar.gz glibc-ec239360d13518a13f572b635d036c7d10028010.tar.bz2 glibc-ec239360d13518a13f572b635d036c7d10028010.zip |
Update.
* db2/Makefile (distribute): Remove files which do not exist
anymore.
Diffstat (limited to 'db2/mp')
-rw-r--r-- | db2/mp/mp_bh.c | 176 | ||||
-rw-r--r-- | db2/mp/mp_fget.c | 27 | ||||
-rw-r--r-- | db2/mp/mp_fopen.c | 123 | ||||
-rw-r--r-- | db2/mp/mp_fput.c | 14 | ||||
-rw-r--r-- | db2/mp/mp_fset.c | 4 | ||||
-rw-r--r-- | db2/mp/mp_open.c | 45 | ||||
-rw-r--r-- | db2/mp/mp_pr.c | 26 | ||||
-rw-r--r-- | db2/mp/mp_region.c | 42 | ||||
-rw-r--r-- | db2/mp/mp_sync.c | 166 |
9 files changed, 382 insertions, 241 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c index d89f9c2ded..12c53417d9 100644 --- a/db2/mp/mp_bh.c +++ b/db2/mp/mp_bh.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_bh.c 10.38 (Sleepycat) 5/20/98"; +static const char sccsid[] = "@(#)mp_bh.c 10.45 (Sleepycat) 11/25/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -42,11 +42,13 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) { DB_MPOOLFILE *dbmfp; DB_MPREG *mpreg; + int incremented, ret; if (restartp != NULL) *restartp = 0; if (wrotep != NULL) *wrotep = 0; + incremented = 0; /* * Walk the process' DB_MPOOLFILE list and find a file descriptor for @@ -63,6 +65,13 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) UNLOCKHANDLE(dbmp, dbmp->mutexp); return (0); } + + /* + * Increment the reference count -- see the comment in + * memp_fclose(). + */ + ++dbmfp->ref; + incremented = 1; break; } UNLOCKHANDLE(dbmp, dbmp->mutexp); @@ -117,7 +126,15 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0) return (0); -found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep)); +found: ret = __memp_pgwrite(dbmfp, bhp, restartp, wrotep); + + if (incremented) { + LOCKHANDLE(dbmp, dbmp->mutexp); + --dbmfp->ref; + UNLOCKHANDLE(dbmp, dbmp->mutexp); + } + + return (ret); } /* @@ -132,11 +149,12 @@ __memp_pgread(dbmfp, bhp, can_create) BH *bhp; int can_create; { + DB_IO db_io; DB_MPOOL *dbmp; MPOOLFILE *mfp; - size_t pagesize; + size_t len, pagesize; ssize_t nr; - int ret; + int created, ret; dbmp = dbmfp->dbmp; mfp = dbmfp->mfp; @@ -147,70 +165,63 @@ __memp_pgread(dbmfp, bhp, can_create) UNLOCKREGION(dbmp); /* - * Temporary files may not yet have been created. - * - * Seek to the page location. + * Temporary files may not yet have been created. We don't create + * them now, we create them when the pages have to be flushed. */ - ret = 0; - LOCKHANDLE(dbmp, dbmfp->mutexp); - if (dbmfp->fd == -1 || (ret = - __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) { - if (!can_create) { - if (dbmfp->fd == -1) - ret = EINVAL; - UNLOCKHANDLE(dbmp, dbmfp->mutexp); + nr = 0; + if (dbmfp->fd == -1) + ret = 0; + else { + /* + * Ignore read errors if we have permission to create the page. + * Assume that the page doesn't exist, and that we'll create it + * when we write it out. + */ + db_io.fd_io = dbmfp->fd; + db_io.fd_lock = dbmp->reginfo.fd; + db_io.mutexp = + F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL; + db_io.pagesize = db_io.bytes = pagesize; + db_io.pgno = bhp->pgno; + db_io.buf = bhp->buf; + + ret = __os_io(&db_io, DB_IO_READ, &nr); + } + + created = 0; + if (nr < (ssize_t)pagesize) { + if (can_create) + created = 1; + else { + /* If we had a short read, ret may be 0. */ + if (ret == 0) + ret = EIO; __db_err(dbmp->dbenv, "%s: page %lu doesn't exist, create flag not set", __memp_fn(dbmfp), (u_long)bhp->pgno); goto err; } - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - - /* Clear the created page. */ - if (mfp->clear_len == 0) - memset(bhp->buf, 0, pagesize); - else { - memset(bhp->buf, 0, mfp->clear_len); -#ifdef DIAGNOSTIC - memset(bhp->buf + mfp->clear_len, - 0xff, pagesize - mfp->clear_len); -#endif - } - - goto pgin; } /* - * Read the page; short reads are treated like creates, although - * any valid data is preserved. + * Clear any bytes we didn't read that need to be cleared. If we're + * running in diagnostic mode, smash any bytes on the page that are + * unknown quantities for the caller. */ - ret = __db_read(dbmfp->fd, bhp->buf, pagesize, &nr); - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - if (ret != 0) - goto err; - - if (nr == (ssize_t)pagesize) - can_create = 0; - else { - if (!can_create) { - ret = EINVAL; - goto err; - } - - /* - * If we didn't fail until we tried the read, don't clear the - * whole page, it wouldn't be insane for a filesystem to just - * always behave that way. Else, clear any uninitialized data. - */ - if (nr == 0) - memset(bhp->buf, 0, - mfp->clear_len == 0 ? pagesize : mfp->clear_len); - else - memset(bhp->buf + nr, 0, pagesize - nr); + if (nr != (ssize_t)pagesize) { + len = mfp->clear_len == 0 ? pagesize : mfp->clear_len; + if (nr < (ssize_t)len) + memset(bhp->buf + nr, 0, len - nr); +#ifdef DIAGNOSTIC + if (nr > (ssize_t)len) + len = nr; + if (len < pagesize) + memset(bhp->buf + len, 0xdb, pagesize - len); +#endif } /* Call any pgin function. */ -pgin: ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1); + ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1); /* Unlock the buffer and reacquire the region lock. */ err: UNLOCKBUFFER(dbmp, bhp); @@ -225,7 +236,7 @@ err: UNLOCKBUFFER(dbmp, bhp); F_CLR(bhp, BH_TRASH); /* Update the statistics. */ - if (can_create) { + if (created) { ++dbmp->mp->stat.st_page_create; ++mfp->stat.st_page_create; } else { @@ -250,12 +261,12 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) int *restartp, *wrotep; { DB_ENV *dbenv; + DB_IO db_io; DB_LOG *lg_info; DB_LSN lsn; DB_MPOOL *dbmp; MPOOL *mp; MPOOLFILE *mfp; - size_t pagesize; ssize_t nw; int callpgin, ret, syncfail; const char *fail; @@ -270,7 +281,6 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) if (wrotep != NULL) *wrotep = 0; callpgin = 0; - pagesize = mfp->stat.st_pagesize; /* * Check the dirty bit -- this buffer may have been written since we @@ -326,34 +336,32 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) } /* Temporary files may not yet have been created. */ - LOCKHANDLE(dbmp, dbmfp->mutexp); - if (dbmfp->fd == -1 && - ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL, - DB_CREATE | DB_EXCL | DB_TEMPORARY, &dbmfp->fd, NULL)) != 0 || - dbmfp->fd == -1)) { + if (dbmfp->fd == -1) { + LOCKHANDLE(dbmp, dbmfp->mutexp); + if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv, + DB_APP_TMP, NULL, NULL, DB_CREATE | DB_EXCL | DB_TEMPORARY, + &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) { + UNLOCKHANDLE(dbmp, dbmfp->mutexp); + __db_err(dbenv, + "unable to create temporary backing file"); + goto err; + } UNLOCKHANDLE(dbmp, dbmfp->mutexp); - __db_err(dbenv, "unable to create temporary backing file"); - goto err; } - /* - * Write the page out. - * - * XXX - * Shut the compiler up; it doesn't understand the correlation between - * the failing clauses to __db_lseek and __db_write and this ret != 0. - */ - COMPQUIET(fail, NULL); - if ((ret = - __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) - fail = "seek"; - else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0) + /* Write the page. */ + db_io.fd_io = dbmfp->fd; + db_io.fd_lock = dbmp->reginfo.fd; + db_io.mutexp = F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL; + db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize; + db_io.pgno = bhp->pgno; + db_io.buf = bhp->buf; + if ((ret = __os_io(&db_io, DB_IO_WRITE, &nw)) != 0) { + __db_panic(dbenv, ret); fail = "write"; - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - if (ret != 0) goto syserr; - - if (nw != (ssize_t)pagesize) { + } + if (nw != (ssize_t)mfp->stat.st_pagesize) { ret = EIO; fail = "write"; goto syserr; @@ -394,7 +402,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) if (F_ISSET(bhp, BH_WRITE)) { if (mfp->lsn_cnt == 1) { UNLOCKREGION(dbmp); - syncfail = __db_fsync(dbmfp->fd) != 0; + syncfail = __os_fsync(dbmfp->fd) != 0; LOCKREGION(dbmp); if (syncfail) F_SET(mp, MP_LSN_RETRY); @@ -574,11 +582,11 @@ __memp_upgrade(dbmp, dbmfp, mfp) ret = 1; } else { /* Swap the descriptors and set the upgrade flag. */ - (void)__db_close(dbmfp->fd); + (void)__os_close(dbmfp->fd); dbmfp->fd = fd; F_SET(dbmfp, MP_UPGRADE); ret = 0; } - FREES(rpath); + __os_freestr(rpath); return (ret); } diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c index 0777aa7dc6..f159dc2d3e 100644 --- a/db2/mp/mp_fget.c +++ b/db2/mp/mp_fget.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fget.c 10.48 (Sleepycat) 6/2/98"; +static const char sccsid[] = "@(#)mp_fget.c 10.53 (Sleepycat) 11/16/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -46,6 +46,8 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) mp = dbmp->mp; mfp = dbmfp->mfp; + MP_PANIC_CHECK(dbmp); + /* * Validate arguments. * @@ -79,12 +81,11 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) #ifdef DIAGNOSTIC /* * XXX - * We want to switch threads as often as possible. Sleep every time - * we get a new page to make it more likely. + * We want to switch threads as often as possible. Yield every time + * we get a new page to ensure contention. */ - if (DB_GLOBAL(db_pageyield) && - (__db_yield == NULL || __db_yield() != 0)) - __db_sleep(0, 1); + if (DB_GLOBAL(db_pageyield)) + __os_yield(1); #endif /* Initialize remaining local variables. */ @@ -205,8 +206,8 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) * up running to the end of our CPU quantum as we will * simply be swapping between the two locks. */ - if (!first && (__db_yield == NULL || __db_yield() != 0)) - __db_sleep(0, 1); + if (!first) + __os_yield(1); LOCKBUFFER(dbmp, bhp); /* Wait for I/O to finish... */ @@ -240,7 +241,7 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) } alloc: /* Allocate new buffer header and data space. */ - if ((ret = __memp_ralloc(dbmp, sizeof(BH) - + if ((ret = __memp_alloc(dbmp, sizeof(BH) - sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0) goto err; @@ -285,7 +286,7 @@ alloc: /* Allocate new buffer header and data space. */ else { memset(bhp->buf, 0, mfp->clear_len); #ifdef DIAGNOSTIC - memset(bhp->buf + mfp->clear_len, 0xff, + memset(bhp->buf + mfp->clear_len, 0xdb, mfp->stat.st_pagesize - mfp->clear_len); #endif } @@ -335,11 +336,9 @@ done: /* Update the chain search statistics. */ mp->stat.st_hash_examined += st_hsearch; } - UNLOCKREGION(dbmp); - - LOCKHANDLE(dbmp, dbmfp->mutexp); ++dbmfp->pinref; - UNLOCKHANDLE(dbmp, dbmfp->mutexp); + + UNLOCKREGION(dbmp); return (0); diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c index a4cbac8d4e..dd02662fd8 100644 --- a/db2/mp/mp_fopen.c +++ b/db2/mp/mp_fopen.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fopen.c 10.47 (Sleepycat) 5/4/98"; +static const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -43,6 +43,8 @@ memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp) { int ret; + MP_PANIC_CHECK(dbmp); + /* Validate arguments. */ if ((ret = __db_fchk(dbmp->dbenv, "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0) @@ -53,6 +55,8 @@ memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp) __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified"); return (EINVAL); } + if (finfop != NULL && finfop->clear_len > pagesize) + return (EINVAL); return (__memp_fopen(dbmp, NULL, path, flags, mode, pagesize, 1, finfop, retp)); @@ -80,7 +84,7 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) DB_MPOOLFILE *dbmfp; DB_MPOOL_FINFO finfo; db_pgno_t last_pgno; - size_t size; + size_t maxmap; u_int32_t mbytes, bytes; int ret; u_int8_t idbuf[DB_FILE_ID_LEN]; @@ -115,13 +119,11 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) } /* Allocate and initialize the per-process structure. */ - if ((dbmfp = - (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { - __db_err(dbenv, "memp_fopen: %s", strerror(ENOMEM)); - return (ENOMEM); - } + if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0) + return (ret); dbmfp->dbmp = dbmp; dbmfp->fd = -1; + dbmfp->ref = 1; if (LF_ISSET(DB_RDONLY)) F_SET(dbmfp, MP_READONLY); @@ -132,7 +134,6 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) ret = EINVAL; goto err; } - size = 0; last_pgno = 0; } else { /* Get the real name for this file and open it. */ @@ -146,21 +147,40 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) goto err; } - /* Don't permit files that aren't a multiple of the pagesize. */ - if ((ret = __db_ioinfo(rpath, + /* + * Don't permit files that aren't a multiple of the pagesize, + * and find the number of the last page in the file, all the + * time being careful not to overflow 32 bits. + * + * !!! + * We can't use off_t's here, or in any code in the mainline + * library for that matter. (We have to use them in the os + * stubs, of course, as there are system calls that take them + * as arguments.) The reason is that some customers build in + * environments where an off_t is 32-bits, but still run where + * offsets are 64-bits, and they pay us a lot of money. + */ + if ((ret = __os_ioinfo(rpath, dbmfp->fd, &mbytes, &bytes, NULL)) != 0) { __db_err(dbenv, "%s: %s", rpath, strerror(ret)); goto err; } - if (bytes % pagesize) { + + /* Page sizes have to be a power-of-two, ignore mbytes. */ + if (bytes % pagesize != 0) { __db_err(dbenv, "%s: file size not a multiple of the pagesize", rpath); ret = EINVAL; goto err; } - size = mbytes * MEGABYTE + bytes; - last_pgno = size == 0 ? 0 : (size - 1) / pagesize; + + last_pgno = mbytes * (MEGABYTE / pagesize); + last_pgno += bytes / pagesize; + + /* Correction: page numbers are zero-based, not 1-based. */ + if (last_pgno != 0) + --last_pgno; /* * Get the file id if we weren't given one. Generated file id's @@ -168,7 +188,7 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) * other process joining the party. */ if (finfop->fileid == NULL) { - if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0) + if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0) goto err; finfop->fileid = idbuf; } @@ -191,7 +211,7 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) } if (ret == 0 && F_ISSET(dbmp, MP_LOCKHANDLE) && (ret = - __memp_ralloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0) + __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0) LOCKINIT(dbmp, dbmfp->mutexp); if (needlock) @@ -232,13 +252,15 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) F_CLR(mfp, MP_CAN_MMAP); if (LF_ISSET(DB_NOMMAP)) F_CLR(mfp, MP_CAN_MMAP); - if (size > (dbenv == NULL || dbenv->mp_mmapsize == 0 ? - DB_MAXMMAPSIZE : dbenv->mp_mmapsize)) + maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ? + DB_MAXMMAPSIZE : dbenv->mp_mmapsize; + if (mbytes > maxmap / MEGABYTE || + (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE)) F_CLR(mfp, MP_CAN_MMAP); } dbmfp->addr = NULL; if (F_ISSET(mfp, MP_CAN_MMAP)) { - dbmfp->len = size; + dbmfp->len = (size_t)mbytes * MEGABYTE + bytes; if (__db_mapfile(rpath, dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) { dbmfp->addr = NULL; @@ -246,7 +268,7 @@ __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) } } if (rpath != NULL) - FREES(rpath); + __os_freestr(rpath); LOCKHANDLE(dbmp, dbmp->mutexp); TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); @@ -260,11 +282,11 @@ err: /* * never get to here after we have successfully allocated it. */ if (rpath != NULL) - FREES(rpath); + __os_freestr(rpath); if (dbmfp->fd != -1) - (void)__db_close(dbmfp->fd); + (void)__os_close(dbmfp->fd); if (dbmfp != NULL) - FREE(dbmfp, sizeof(DB_MPOOLFILE)); + __os_free(dbmfp, sizeof(DB_MPOOLFILE)); return (ret); } @@ -315,7 +337,7 @@ __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) } /* Allocate a new MPOOLFILE. */ - if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) + if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) return (ret); *retp = mfp; @@ -334,21 +356,22 @@ __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) mfp->stat.st_pagesize = pagesize; mfp->orig_last_pgno = mfp->last_pgno = last_pgno; - F_SET(mfp, MP_CAN_MMAP); if (ISTEMPORARY) F_SET(mfp, MP_TEMP); else { /* Copy the file path into shared memory. */ - if ((ret = __memp_ralloc(dbmp, + if ((ret = __memp_alloc(dbmp, strlen(path) + 1, &mfp->path_off, &p)) != 0) goto err; memcpy(p, path, strlen(path) + 1); /* Copy the file identification string into shared memory. */ - if ((ret = __memp_ralloc(dbmp, + if ((ret = __memp_alloc(dbmp, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) goto err; memcpy(p, finfop->fileid, DB_FILE_ID_LEN); + + F_SET(mfp, MP_CAN_MMAP); } /* Copy the page cookie into shared memory. */ @@ -356,7 +379,7 @@ __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) mfp->pgcookie_len = 0; mfp->pgcookie_off = 0; } else { - if ((ret = __memp_ralloc(dbmp, + if ((ret = __memp_alloc(dbmp, finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) goto err; memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size); @@ -394,16 +417,48 @@ memp_fclose(dbmfp) dbmp = dbmfp->dbmp; ret = 0; + MP_PANIC_CHECK(dbmp); + + for (;;) { + LOCKHANDLE(dbmp, dbmp->mutexp); + + /* + * We have to reference count DB_MPOOLFILE structures as other + * threads may be using them. The problem only happens if the + * application makes a bad design choice. Here's the path: + * + * Thread A opens a database. + * Thread B uses thread A's DB_MPOOLFILE to write a buffer + * in order to free up memory in the mpool cache. + * Thread A closes the database while thread B is using the + * DB_MPOOLFILE structure. + * + * By opening all databases before creating the threads, and + * closing them after the threads have exited, applications + * get better performance and avoid the problem path entirely. + * + * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer + * is a short-term lock, even in worst case, since we better be + * the only thread of control using the DB_MPOOLFILE structure + * to read pages *into* the cache. Wait until we're the only + * reference holder and remove the DB_MPOOLFILE structure from + * the list, so nobody else can even find it. + */ + if (dbmfp->ref == 1) { + TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); + break; + } + UNLOCKHANDLE(dbmp, dbmp->mutexp); + + (void)__os_sleep(1, 0); + } + UNLOCKHANDLE(dbmp, dbmp->mutexp); + /* Complain if pinned blocks never returned. */ if (dbmfp->pinref != 0) __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned", __memp_fn(dbmfp), (u_long)dbmfp->pinref); - /* Remove the DB_MPOOLFILE structure from the list. */ - LOCKHANDLE(dbmp, dbmp->mutexp); - TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); - UNLOCKHANDLE(dbmp, dbmp->mutexp); - /* Close the underlying MPOOLFILE. */ (void)__memp_mf_close(dbmp, dbmfp); @@ -414,7 +469,7 @@ memp_fclose(dbmfp) "%s: %s", __memp_fn(dbmfp), strerror(ret)); /* Close the file; temporary files may not yet have been created. */ - if (dbmfp->fd != -1 && (t_ret = __db_close(dbmfp->fd)) != 0) { + if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) { __db_err(dbmp->dbenv, "%s: %s", __memp_fn(dbmfp), strerror(t_ret)); if (ret != 0) @@ -429,7 +484,7 @@ memp_fclose(dbmfp) } /* Discard the DB_MPOOLFILE structure. */ - FREE(dbmfp, sizeof(DB_MPOOLFILE)); + __os_free(dbmfp, sizeof(DB_MPOOLFILE)); return (ret); } diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c index 48fdfc3b7f..c551f97380 100644 --- a/db2/mp/mp_fput.c +++ b/db2/mp/mp_fput.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fput.c 10.22 (Sleepycat) 4/26/98"; +static const char sccsid[] = "@(#)mp_fput.c 10.24 (Sleepycat) 9/27/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -40,6 +40,8 @@ memp_fput(dbmfp, pgaddr, flags) dbmp = dbmfp->dbmp; mp = dbmp->mp; + MP_PANIC_CHECK(dbmp); + /* Validate arguments. */ if (flags) { if ((ret = __db_fchk(dbmp->dbenv, "memp_fput", flags, @@ -57,15 +59,15 @@ memp_fput(dbmfp, pgaddr, flags) } } + LOCKREGION(dbmp); + /* Decrement the pinned reference count. */ - LOCKHANDLE(dbmp, dbmfp->mutexp); if (dbmfp->pinref == 0) __db_err(dbmp->dbenv, "%s: put: more blocks returned than retrieved", __memp_fn(dbmfp)); else --dbmfp->pinref; - UNLOCKHANDLE(dbmp, dbmfp->mutexp); /* * If we're mapping the file, there's nothing to do. Because we can @@ -74,14 +76,14 @@ memp_fput(dbmfp, pgaddr, flags) * region. */ if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr && - (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) + (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) { + UNLOCKREGION(dbmp); return (0); + } /* Convert the page address to a buffer header. */ bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); - LOCKREGION(dbmp); - /* Set/clear the page bits. */ if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) { ++mp->stat.st_page_clean; diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c index 3b352aa553..1940d3b198 100644 --- a/db2/mp/mp_fset.c +++ b/db2/mp/mp_fset.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fset.c 10.15 (Sleepycat) 4/26/98"; +static const char sccsid[] = "@(#)mp_fset.c 10.16 (Sleepycat) 9/27/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -40,6 +40,8 @@ memp_fset(dbmfp, pgaddr, flags) dbmp = dbmfp->dbmp; mp = dbmp->mp; + MP_PANIC_CHECK(dbmp); + /* Validate arguments. */ if (flags == 0) return (__db_ferr(dbmp->dbenv, "memp_fset", 1)); diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c index fc985bc521..4c90fc438f 100644 --- a/db2/mp/mp_open.c +++ b/db2/mp/mp_open.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_open.c 10.23 (Sleepycat) 5/3/98"; +static const char sccsid[] = "@(#)mp_open.c 10.27 (Sleepycat) 10/1/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -52,8 +52,8 @@ memp_open(path, flags, mode, dbenv, retp) cachesize = dbenv == NULL ? 0 : dbenv->mp_size; /* Create and initialize the DB_MPOOL structure. */ - if ((dbmp = (DB_MPOOL *)__db_calloc(1, sizeof(DB_MPOOL))) == NULL) - return (ENOMEM); + if ((ret = __os_calloc(1, sizeof(DB_MPOOL), &dbmp)) != 0) + return (ret); LIST_INIT(&dbmp->dbregq); TAILQ_INIT(&dbmp->dbmfq); @@ -83,7 +83,7 @@ memp_open(path, flags, mode, dbenv, retp) if (LF_ISSET(DB_THREAD)) { F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION); LOCKREGION(dbmp); - ret = __memp_ralloc(dbmp, + ret = __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmp->mutexp); UNLOCKREGION(dbmp); if (ret != 0) { @@ -97,7 +97,7 @@ memp_open(path, flags, mode, dbenv, retp) return (0); err: if (dbmp != NULL) - FREE(dbmp, sizeof(DB_MPOOL)); + __os_free(dbmp, sizeof(DB_MPOOL)); return (ret); } @@ -115,10 +115,12 @@ memp_close(dbmp) ret = 0; + MP_PANIC_CHECK(dbmp); + /* Discard DB_MPREGs. */ while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) { LIST_REMOVE(mpreg, q); - FREE(mpreg, sizeof(DB_MPREG)); + __os_free(mpreg, sizeof(DB_MPREG)); } /* Discard DB_MPOOLFILEs. */ @@ -138,13 +140,27 @@ memp_close(dbmp) ret = t_ret; if (dbmp->reginfo.path != NULL) - FREES(dbmp->reginfo.path); - FREE(dbmp, sizeof(DB_MPOOL)); + __os_freestr(dbmp->reginfo.path); + __os_free(dbmp, sizeof(DB_MPOOL)); return (ret); } /* + * __memp_panic -- + * Panic a memory pool. + * + * PUBLIC: void __memp_panic __P((DB_ENV *)); + */ +void +__memp_panic(dbenv) + DB_ENV *dbenv; +{ + if (dbenv->mp_info != NULL) + dbenv->mp_info->mp->rlayout.panic = 1; +} + +/* * memp_unlink -- * Exit a memory pool. */ @@ -160,12 +176,12 @@ memp_unlink(path, force, dbenv) memset(®info, 0, sizeof(reginfo)); reginfo.dbenv = dbenv; reginfo.appname = DB_APP_NONE; - if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL) - return (ENOMEM); + if (path != NULL && (ret = __os_strdup(path, ®info.path)) != 0) + return (ret); reginfo.file = DB_DEFAULT_MPOOL_FILE; ret = __db_runlink(®info, force); if (reginfo.path != NULL) - FREES(reginfo.path); + __os_freestr(reginfo.path); return (ret); } @@ -181,9 +197,12 @@ memp_register(dbmp, ftype, pgin, pgout) int (*pgout) __P((db_pgno_t, void *, DBT *)); { DB_MPREG *mpr; + int ret; + + MP_PANIC_CHECK(dbmp); - if ((mpr = (DB_MPREG *)__db_malloc(sizeof(DB_MPREG))) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(sizeof(DB_MPREG), NULL, &mpr)) != 0) + return (ret); mpr->ftype = ftype; mpr->pgin = pgin; diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c index e83e0f44fa..84c782e781 100644 --- a/db2/mp/mp_pr.c +++ b/db2/mp/mp_pr.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_pr.c 10.26 (Sleepycat) 5/23/98"; +static const char sccsid[] = "@(#)mp_pr.c 10.30 (Sleepycat) 10/1/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -44,16 +44,17 @@ memp_stat(dbmp, gspp, fspp, db_malloc) DB_MPOOL_FSTAT **tfsp; MPOOLFILE *mfp; size_t len, nlen; + int ret; char *name; + MP_PANIC_CHECK(dbmp); + /* Allocate space for the global statistics. */ if (gspp != NULL) { *gspp = NULL; - if ((*gspp = db_malloc == NULL ? - (DB_MPOOL_STAT *)__db_malloc(sizeof(**gspp)) : - (DB_MPOOL_STAT *)db_malloc(sizeof(**gspp))) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(sizeof(**gspp), db_malloc, gspp)) != 0) + return (ret); LOCKREGION(dbmp); @@ -89,10 +90,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc) /* Allocate space for the pointers. */ len = (len + 1) * sizeof(DB_MPOOL_FSTAT *); - if ((*fspp = db_malloc == NULL ? - (DB_MPOOL_FSTAT **)__db_malloc(len) : - (DB_MPOOL_FSTAT **)db_malloc(len)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(len, db_malloc, fspp)) != 0) + return (ret); LOCKREGION(dbmp); @@ -104,10 +103,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc) name = __memp_fns(dbmp, mfp); nlen = strlen(name); len = sizeof(DB_MPOOL_FSTAT) + nlen + 1; - if ((*tfsp = db_malloc == NULL ? - (DB_MPOOL_FSTAT *)__db_malloc(len) : - (DB_MPOOL_FSTAT *)db_malloc(len)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(len, db_malloc, tfsp)) != 0) + return (ret); **tfsp = mfp->stat; (*tfsp)->file_name = (char *) (u_int8_t *)*tfsp + sizeof(DB_MPOOL_FSTAT); @@ -212,8 +209,9 @@ __memp_dump_region(dbmp, area, fp) cnt = 0; for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) { - (void)fprintf(fp, "file #%d: %s: %lu references: %s\n", + (void)fprintf(fp, "file #%d: %s: refs %lu, type %ld, %s\n", cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref, + (long)mfp->ftype, F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write"); if (cnt < FMAP_ENTRIES) fmap[cnt] = R_OFFSET(dbmp, mfp); diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c index b8a72286cd..b9c92f2e13 100644 --- a/db2/mp/mp_region.c +++ b/db2/mp/mp_region.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98"; +static const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -24,13 +24,33 @@ static const char sccsid[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98"; #include "common_ext.h" /* - * __memp_ralloc -- + * __memp_reg_alloc -- + * Allocate some space in the mpool region, with locking. + * + * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *)); + */ +int +__memp_reg_alloc(dbmp, len, offsetp, retp) + DB_MPOOL *dbmp; + size_t len, *offsetp; + void *retp; +{ + int ret; + + LOCKREGION(dbmp); + ret = __memp_alloc(dbmp, len, offsetp, retp); + UNLOCKREGION(dbmp); + return (ret); +} + +/* + * __memp_alloc -- * Allocate some space in the mpool region. * - * PUBLIC: int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *)); + * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *)); */ int -__memp_ralloc(dbmp, len, offsetp, retp) +__memp_alloc(dbmp, len, offsetp, retp) DB_MPOOL *dbmp; size_t len, *offsetp; void *retp; @@ -52,7 +72,9 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { return (0); } if (nomore) { - __db_err(dbmp->dbenv, "%s", strerror(ret)); + __db_err(dbmp->dbenv, + "Unable to allocate %lu bytes from mpool shared region: %s\n", + (u_long)len, strerror(ret)); return (ret); } @@ -91,7 +113,7 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { } retry: /* Find a buffer we can flush; pure LRU. */ - total = 0; + restart = total = 0; for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { nbhp = SH_TAILQ_NEXT(bhp, q, __bh); @@ -222,8 +244,8 @@ __memp_ropen(dbmp, path, cachesize, mode, is_private, flags) if (path == NULL) dbmp->reginfo.path = NULL; else - if ((dbmp->reginfo.path = __db_strdup(path)) == NULL) - return (ENOMEM); + if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0) + return (ret); dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE; dbmp->reginfo.mode = mode; dbmp->reginfo.size = rlen; @@ -244,7 +266,7 @@ __memp_ropen(dbmp, path, cachesize, mode, is_private, flags) if ((ret = __db_rattach(&dbmp->reginfo)) != 0) { if (dbmp->reginfo.path != NULL) - FREES(dbmp->reginfo.path); + __os_freestr(dbmp->reginfo.path); return (ret); } @@ -303,6 +325,6 @@ err: UNLOCKREGION(dbmp); (void)memp_unlink(path, 1, dbmp->dbenv); if (dbmp->reginfo.path != NULL) - FREES(dbmp->reginfo.path); + __os_freestr(dbmp->reginfo.path); return (ret); } diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c index 33218eef1a..535348517c 100644 --- a/db2/mp/mp_sync.c +++ b/db2/mp/mp_sync.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_sync.c 10.25 (Sleepycat) 4/26/98"; +static const char sccsid[] = "@(#)mp_sync.c 10.31 (Sleepycat) 12/11/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -39,9 +39,12 @@ memp_sync(dbmp, lsnp) DB_ENV *dbenv; MPOOL *mp; MPOOLFILE *mfp; - int ar_cnt, cnt, nalloc, next, ret, wrote; + int ar_cnt, nalloc, next, maxpin, ret, wrote; + + MP_PANIC_CHECK(dbmp); dbenv = dbmp->dbenv; + mp = dbmp->mp; if (dbenv->lg_info == NULL) { __db_err(dbenv, "memp_sync: requires logging"); @@ -49,16 +52,19 @@ memp_sync(dbmp, lsnp) } /* - * We try and write the buffers in page order so that the underlying - * filesystem doesn't have to seek and can write contiguous blocks, - * plus, we don't want to hold the region lock while we write the - * buffers. Get memory to hold the buffer pointers. Get a good-size - * block, too, because we realloc while holding the region lock if we - * run out. + * We try and write the buffers in page order: it should reduce seeks + * by the underlying filesystem and possibly reduce the actual number + * of writes. We don't want to hold the region lock while we write + * the buffers, so only hold it lock while we create a list. Get a + * good-size block of memory to hold buffer pointers, we don't want + * to run out. */ - if ((bharray = - (BH **)__db_malloc((nalloc = 1024) * sizeof(BH *))) == NULL) - return (ENOMEM); + LOCKREGION(dbmp); + nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10; + UNLOCKREGION(dbmp); + + if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0) + return (ret); LOCKREGION(dbmp); @@ -70,7 +76,6 @@ memp_sync(dbmp, lsnp) * we've already handled or are currently handling, then we return a * result based on the count for the larger LSN. */ - mp = dbmp->mp; if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) { if (mp->lsn_cnt == 0) { *lsnp = mp->lsn; @@ -114,10 +119,15 @@ memp_sync(dbmp, lsnp) * finish. Since the application may have restarted the sync, clear * any BH_WRITE flags that appear to be left over from previous calls. * + * We don't want to pin down the entire buffer cache, otherwise we'll + * starve threads needing new pages. Don't pin down more than 80% of + * the cache. + * * Keep a count of the total number of buffers we need to write in * MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count. */ ar_cnt = 0; + maxpin = ((mp->stat.st_page_dirty + mp->stat.st_page_clean) * 8) / 10; for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) { @@ -130,19 +140,27 @@ memp_sync(dbmp, lsnp) /* * If the buffer isn't in use, we should be able to - * write it immediately, so save a reference to it. + * write it immediately, so increment the reference + * count to lock it and its contents down, and then + * save a reference to it. + * + * If we've run out space to store buffer references, + * we're screwed. We don't want to realloc the array + * while holding a region lock, so we set the flag to + * force the checkpoint to be done again, from scratch, + * later. + * + * If we've pinned down too much of the cache stop, and + * set a flag to force the checkpoint to be tried again + * later. */ if (bhp->ref == 0) { - if (ar_cnt == nalloc) { - nalloc *= 2; - if ((bharray = - (BH **)__db_realloc(bharray, - nalloc * sizeof(BH *))) == NULL) { - ret = ENOMEM; - goto err; - } + ++bhp->ref; + bharray[ar_cnt] = bhp; + if (++ar_cnt >= nalloc || ar_cnt >= maxpin) { + F_SET(mp, MP_LSN_RETRY); + break; } - bharray[ar_cnt++] = bhp; } } else if (F_ISSET(bhp, BH_WRITE)) @@ -154,10 +172,6 @@ memp_sync(dbmp, lsnp) goto done; } - /* Lock down the buffers and their contents. */ - for (cnt = 0; cnt < ar_cnt; ++cnt) - ++bharray[cnt]->ref; - UNLOCKREGION(dbmp); /* Sort the buffers we're going to write. */ @@ -205,7 +219,8 @@ memp_sync(dbmp, lsnp) goto err; } } - ret = mp->lsn_cnt ? DB_INCOMPLETE : 0; + ret = mp->lsn_cnt != 0 || + F_ISSET(mp, MP_LSN_RETRY) ? DB_INCOMPLETE : 0; done: if (0) { @@ -224,7 +239,7 @@ err: /* F_CLR(bhp, BH_WRITE); } UNLOCKREGION(dbmp); - __db_free(bharray); + __os_free(bharray, nalloc * sizeof(BH *)); return (ret); } @@ -241,6 +256,8 @@ memp_fsync(dbmfp) dbmp = dbmfp->dbmp; + MP_PANIC_CHECK(dbmp); + /* * If this handle doesn't have a file descriptor that's open for * writing, or if the file is a temporary, there's no reason to @@ -300,25 +317,29 @@ __memp_fsync(dbmfp) { BH *bhp, **bharray; DB_MPOOL *dbmp; + MPOOL *mp; size_t mf_offset; - int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote; + int ar_cnt, incomplete, nalloc, next, ret, wrote; ret = 0; dbmp = dbmfp->dbmp; + mp = dbmp->mp; mf_offset = R_OFFSET(dbmp, dbmfp->mfp); /* - * We try and write the buffers in page order so that the underlying - * filesystem doesn't have to seek and can write contiguous blocks, - * plus, we don't want to hold the region lock while we write the - * buffers. Get memory to hold the buffer pointers. Get a good-size - * block, too, because we realloc while holding the region lock if we - * run out. + * We try and write the buffers in page order: it should reduce seeks + * by the underlying filesystem and possibly reduce the actual number + * of writes. We don't want to hold the region lock while we write + * the buffers, so only hold it lock while we create a list. Get a + * good-size block of memory to hold buffer pointers, we don't want + * to run out. */ - nalloc = 1024; - if ((bharray = - (BH **)__db_malloc((size_t)nalloc * sizeof(BH *))) == NULL) - return (ENOMEM); + LOCKREGION(dbmp); + nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10; + UNLOCKREGION(dbmp); + + if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0) + return (ret); LOCKREGION(dbmp); @@ -326,36 +347,37 @@ __memp_fsync(dbmfp) * Walk the LRU list of buffer headers, and get a list of buffers to * write for this MPOOLFILE. */ - ar_cnt = pincnt = 0; - for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); + ar_cnt = incomplete = 0; + for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) { if (!F_ISSET(bhp, BH_DIRTY) || bhp->mf_offset != mf_offset) continue; if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) { - ++pincnt; + incomplete = 1; continue; } - if (ar_cnt == nalloc) { - nalloc *= 2; - if ((bharray = (BH **)__db_realloc(bharray, - nalloc * sizeof(BH *))) == NULL) { - ret = ENOMEM; - goto err; - } - } + ++bhp->ref; + bharray[ar_cnt] = bhp; - bharray[ar_cnt++] = bhp; + /* + * If we've run out space to store buffer references, we're + * screwed, as we don't want to realloc the array holding a + * region lock. Set the incomplete flag -- the only way we + * can get here is if the file is active in the buffer cache, + * which is the same thing as finding pinned buffers. + */ + if (++ar_cnt >= nalloc) { + incomplete = 1; + break; + } } - /* Lock down the buffers and their contents. */ - for (cnt = 0; cnt < ar_cnt; ++cnt) - ++bharray[cnt]->ref; - UNLOCKREGION(dbmp); /* Sort the buffers we're going to write. */ - qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp); + if (ar_cnt != 0) + qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp); LOCKREGION(dbmp); @@ -365,11 +387,10 @@ __memp_fsync(dbmfp) * It's possible for a thread to have gotten the buffer since * we listed it for writing. If the reference count is still * 1, we're the only ones using the buffer, go ahead and write. - * If it's >1, then skip the buffer and assume that it will be - * written when it's returned to the cache. + * If it's >1, then skip the buffer. */ if (bharray[next]->ref > 1) { - ++pincnt; + incomplete = 1; --bharray[next]->ref; continue; @@ -387,13 +408,18 @@ __memp_fsync(dbmfp) --bharray[next]->ref; goto err; } + + /* + * If we didn't write the buffer for some reason, don't return + * success. + */ if (!wrote) - ++pincnt; + incomplete = 1; } err: UNLOCKREGION(dbmp); - __db_free(bharray); + __os_free(bharray, nalloc * sizeof(BH *)); /* * Sync the underlying file as the last thing we do, so that the OS @@ -404,7 +430,7 @@ err: UNLOCKREGION(dbmp); * issues. */ if (ret == 0) - return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE); + return (incomplete ? DB_INCOMPLETE : __os_fsync(dbmfp->fd)); return (ret); } @@ -423,6 +449,8 @@ memp_trickle(dbmp, pct, nwrotep) u_long total; int ret, wrote; + MP_PANIC_CHECK(dbmp); + mp = dbmp->mp; if (nwrotep != NULL) *nwrotep = 0; @@ -487,7 +515,7 @@ loop: total = mp->stat.st_page_clean + mp->stat.st_page_dirty; } /* No more buffers to write. */ - return (0); + ret = 0; err: UNLOCKREGION(dbmp); return (ret); @@ -508,6 +536,14 @@ __bhcmp(p1, p2) if (bhp1->mf_offset > bhp2->mf_offset) return (1); - /* Sort by page in file. */ - return (bhp1->pgno < bhp2->pgno ? -1 : 1); + /* + * !!! + * Defend against badly written quicksort code calling the comparison + * function with two identical pointers (e.g., WATCOM C++ (Power++)). + */ + if (bhp1->pgno < bhp2->pgno) + return (-1); + if (bhp1->pgno > bhp2->pgno) + return (1); + return (0); } |