diff options
Diffstat (limited to 'db2')
54 files changed, 1093 insertions, 635 deletions
diff --git a/db2/Makefile b/db2/Makefile index 8083ee3117..0ae06a3089 100644 --- a/db2/Makefile +++ b/db2/Makefile @@ -59,9 +59,9 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \ bt_split bt_stat btree_auto db db_appinit db_apprec \ db_auto \ db_byteorder db_conv db_dispatch db_dup db_err db_log2 \ - os_abs os_dir os_fid os_fsync os_func os_map os_oflags \ + os_abs os_config os_dir os_fid os_fsync os_map os_oflags \ os_open os_rpath os_rw os_seek os_sleep os_stat os_unlink \ - db_overflow db_pr db_rec db_region db_ret db_salloc \ + os_spin db_overflow db_pr db_rec db_region db_ret db_salloc \ db_shash db_thread hash hash_auto hash_conv hash_debug \ hash_dup hash_func hash_page hash_rec hash_stat lock \ lock_conflict lock_deadlock lock_util log log_archive \ diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c index e5f3faeb70..47ecd7c66d 100644 --- a/db2/btree/bt_cursor.c +++ b/db2/btree/bt_cursor.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_cursor.c 10.35 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)bt_cursor.c 10.37 (Sleepycat) 11/22/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -33,7 +33,7 @@ static int __bam_c_next __P((DB *, CURSOR *, int)); static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *)); static int __bam_c_prev __P((DB *, CURSOR *)); static int __bam_c_put __P((DBC *, DBT *, DBT *, int)); -static int __bam_c_rget __P((DB *, CURSOR *, DBT *, DBT *, int)); +static int __bam_c_rget __P((DB *, CURSOR *, DBT *, int)); static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *)); /* Discard the current page/lock held by a cursor. */ @@ -229,7 +229,7 @@ __bam_c_del(dbc, flags) B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type); else B_DSET(GET_BKEYDATA(h, indx)->type); - (void)__bam_ca_delete(dbp, pgno, indx, NULL); + (void)__bam_ca_delete(dbp, pgno, indx, NULL, 0); ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); @@ -313,7 +313,7 @@ __bam_c_get(dbc, key, data, flags) * been rammed into the interface. */ if (LF_ISSET(DB_GET_RECNO)) { - ret = __bam_c_rget(dbp, cp, key, data, flags); + ret = __bam_c_rget(dbp, cp, data, flags); PUTHANDLE(dbp); return (ret); } @@ -441,10 +441,10 @@ err: if (cp->page != NULL) * Return the record number for a cursor. */ static int -__bam_c_rget(dbp, cp, key, data, flags) +__bam_c_rget(dbp, cp, data, flags) DB *dbp; CURSOR *cp; - DBT *key, *data; + DBT *data; int flags; { BTREE *t; @@ -1113,18 +1113,18 @@ __bam_cprint(dbp) /* * __bam_ca_delete -- - * Check if any of the cursors refer to the item we are about to delete. - * We'll return the number of cursors that refer to the item in question. - * If a cursor does refer to the item, then we set its deleted bit. + * Check if any of the cursors refer to the item we are about to delete, + * returning the number of cursors that refer to the item in question. * - * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *)); + * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int)); */ int -__bam_ca_delete(dbp, pgno, indx, curs) +__bam_ca_delete(dbp, pgno, indx, curs, key_delete) DB *dbp; db_pgno_t pgno; u_int32_t indx; CURSOR *curs; + int key_delete; { DBC *dbc; CURSOR *cp; @@ -1140,22 +1140,40 @@ __bam_ca_delete(dbp, pgno, indx, curs) * It's possible for multiple cursors within the thread to have write * locks on the same page, but, cursors within a thread must be single * threaded, so all we're locking here is the cursor linked list. - * - * indx refers to the first of what might be a duplicate set. The - * cursor passed in is the one initiating the delete, so we don't - * want to count it. */ DB_THREAD_LOCK(dbp); + for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; - if ((curs != cp && - cp->pgno == pgno && cp->indx == indx) || - (cp->dpgno == pgno && cp->dindx == indx)) { - ++count; - F_SET(cp, C_DELETED); - } + + /* + * Optionally, a cursor passed in is the one initiating the + * delete, so we don't want to count it or set its deleted + * flag. Otherwise, if a cursor refers to the item, then we + * set its deleted flag. + */ + if (curs == cp) + continue; + + /* + * If we're deleting the key itself and not just one of its + * duplicates, repoint the cursor to the main-page key/data + * pair, everything else is about to be discarded. + */ + if (key_delete || cp->dpgno == PGNO_INVALID) { + if (cp->pgno == pgno && cp->indx == indx) { + cp->dpgno = PGNO_INVALID; + ++count; + F_SET(cp, C_DELETED); + } + } else + if (cp->dpgno == pgno && cp->dindx == indx) { + ++count; + F_SET(cp, C_DELETED); + } } + DB_THREAD_UNLOCK(dbp); return (count); } @@ -1440,7 +1458,7 @@ __bam_c_physdel(dbp, cp, h) * If the item is referenced by another cursor, leave it up to that * cursor to do the delete. */ - if (__bam_ca_delete(dbp, pgno, indx, cp) != 0) + if (__bam_ca_delete(dbp, pgno, indx, cp, 0) != 0) return (0); /* diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c index 9593d0109c..dbd1995f89 100644 --- a/db2/btree/bt_delete.c +++ b/db2/btree/bt_delete.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_delete.c 10.22 (Sleepycat) 11/2/97"; +static const char sccsid[] = "@(#)bt_delete.c 10.23 (Sleepycat) 11/22/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -101,17 +101,20 @@ __bam_delete(argdbp, txn, key, flags) h = t->bt_csp->page; indx = t->bt_csp->indx; - /* Delete the key/data pair, including any duplicates. */ + /* Delete the key/data pair, including any on-or-off page duplicates. */ for (cnt = 1, i = indx;; ++cnt) if ((i += P_INDX) >= NUM_ENT(h) || h->inp[i] != h->inp[indx]) break; for (; cnt > 0; --cnt, ++t->lstat.bt_deleted) - if (__bam_ca_delete(dbp, h->pgno, indx, NULL) != 0) { + if (__bam_ca_delete(dbp, h->pgno, indx, NULL, 1) == 0) { + if ((ret = __bam_ditem(dbp, h, indx)) != 0) + goto err; + if ((ret = __bam_ditem(dbp, h, indx)) != 0) + goto err; + } else { B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type); indx += P_INDX; - } else if ((ret = __bam_ditem(dbp, h, indx)) != 0 || - (ret = __bam_ditem(dbp, h, indx)) != 0) - goto err; + } /* If we're using record numbers, update internal page record counts. */ if (F_ISSET(dbp, DB_BT_RECNUM) && (ret = __bam_adjust(dbp, t, -1)) != 0) diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c index b3d775bb0f..3161b02b55 100644 --- a/db2/btree/bt_put.c +++ b/db2/btree/bt_put.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_put.c 10.31 (Sleepycat) 10/26/97"; +static const char sccsid[] = "@(#)bt_put.c 10.35 (Sleepycat) 11/22/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -64,6 +64,7 @@ static const char sccsid[] = "@(#)bt_put.c 10.31 (Sleepycat) 10/26/97"; #include "btree.h" static int __bam_fixed __P((BTREE *, DBT *)); +static int __bam_isdeleted __P((DB *, PAGE *, u_int32_t, int *)); static int __bam_lookup __P((DB *, DBT *, int *)); static int __bam_ndup __P((DB *, PAGE *, u_int32_t)); static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *)); @@ -89,7 +90,7 @@ __bam_put(argdbp, txn, key, data, flags) DB *dbp; PAGE *h; db_indx_t indx; - int exact, iflags, newkey, replace, ret, stack; + int exact, iflags, isdeleted, newkey, replace, ret, stack; DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags); @@ -114,21 +115,25 @@ retry: /* stack = 1; /* - * If an identical key is already in the tree, and DB_NOOVERWRITE is - * set, an error is returned. If an identical key is already in the - * tree and DB_NOOVERWRITE is not set, the key is either added (when - * duplicates are permitted) or an error is returned. The exception - * is when the item located is referenced by a cursor and marked for - * deletion, in which case we permit the overwrite and flag the cursor. + * If DB_NOOVERWRITE is set and there's an identical key in the tree, + * return an error unless the data item has already been marked for + * deletion, or, all the remaining data items have already been marked + * for deletion in the case of duplicates. If all the data items have + * been marked for deletion, we do a replace, otherwise, it has to be + * a set of duplicates, and we simply append a new one to the set. */ - replace = 0; - if (exact && flags == DB_NOOVERWRITE) { - if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) { - ret = DB_KEYEXIST; + isdeleted = replace = 0; + if (exact) { + if ((ret = __bam_isdeleted(dbp, h, indx, &isdeleted)) != 0) goto err; - } - replace = 1; - __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP); + if (isdeleted) { + replace = 1; + __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP); + } else + if (flags == DB_NOOVERWRITE) { + ret = DB_KEYEXIST; + goto err; + } } /* @@ -151,7 +156,7 @@ retry: /* */ newkey = dbp->type == DB_BTREE && !exact; if (exact) { - if (F_ISSET(dbp, DB_AM_DUP)) { + if (!isdeleted && F_ISSET(dbp, DB_AM_DUP)) { /* * Make sure that we're not looking at a page of * duplicates -- if so, move to the last entry on @@ -234,6 +239,88 @@ err: if (stack) } /* + * __bam_isdeleted -- + * Return if the only remaining data item for the element has been + * deleted. + */ +static int +__bam_isdeleted(dbp, h, indx, isdeletedp) + DB *dbp; + PAGE *h; + u_int32_t indx; + int *isdeletedp; +{ + BKEYDATA *bk; + db_pgno_t pgno; + int ret; + + *isdeletedp = 1; + for (;;) { + bk = GET_BKEYDATA(h, indx + O_INDX); + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + case B_OVERFLOW: + if (!B_DISSET(bk->type)) { + *isdeletedp = 0; + return (0); + } + break; + case B_DUPLICATE: + /* + * If the data item referencing the off-page duplicates + * is flagged as deleted, we're done. Else, we have to + * walk the chain of duplicate pages. + */ + if (B_DISSET(bk->type)) + return (0); + goto dupchk; + default: + return (__db_pgfmt(dbp, h->pgno)); + } + + /* + * If there are no more on-page duplicate items, then every + * data item for this key must have been deleted. + */ + if (indx + P_INDX >= (u_int32_t)NUM_ENT(h)) + return (0); + if (h->inp[indx] != h->inp[indx + P_INDX]) + return (0); + + /* Check the next item. */ + indx += P_INDX; + } + /* NOTREACHED */ + +dupchk: /* Check a chain of duplicate pages. */ + pgno = ((BOVERFLOW *)bk)->pgno; + for (;;) { + /* Acquire the next page in the duplicate chain. */ + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + return (ret); + + /* Check each item for a delete flag. */ + for (indx = 0; indx < NUM_ENT(h); ++indx) + if (!B_DISSET(GET_BKEYDATA(h, indx)->type)) { + *isdeletedp = 0; + goto done; + } + /* + * If we reach the end of the duplicate pages, then every + * item we reviewed must have been deleted. + */ + if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID) + goto done; + + (void)memp_fput(dbp->mpf, h, 0); + } + /* NOTREACHED */ + +done: (void)memp_fput(dbp->mpf, h, 0); + return (0); +} + +/* * __bam_lookup -- * Find the right location in the tree for the key. */ @@ -425,10 +512,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) if (op == DB_CURRENT) { bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); - if (B_TYPE(bk->type) == B_OVERFLOW) - have_bytes = BOVERFLOW_PSIZE; - else + if (B_TYPE(bk->type) == B_KEYDATA) have_bytes = BKEYDATA_PSIZE(bk->len); + else + have_bytes = BOVERFLOW_PSIZE; need_bytes = 0; } else { have_bytes = 0; @@ -542,7 +629,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) * If we're dealing with offpage items, we have to * delete and then re-add the item. */ - if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) { + if (bigdata || B_TYPE(bk->type) != B_KEYDATA) { if ((ret = __bam_ditem(dbp, h, indx)) != 0) return (ret); break; @@ -704,9 +791,9 @@ __bam_ritem(dbp, h, indx, data) { BKEYDATA *bk; DBT orig, repl; - db_indx_t lo, ln, min, off, prefix, suffix; + db_indx_t cnt, lo, ln, min, off, prefix, suffix; int32_t nbytes; - int cnt, ret; + int ret; u_int8_t *p, *t; /* diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c index a21a8208bc..c39c9af322 100644 --- a/db2/btree/bt_search.c +++ b/db2/btree/bt_search.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_search.c 10.8 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)bt_search.c 10.9 (Sleepycat) 11/18/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -119,12 +119,20 @@ __bam_search(dbp, key, flags, stop, recnop, exactp) return (ret); } - /* Decide if we need to save this page; if we do, write lock it. */ + /* + * Decide if we need to save this page; if we do, write lock it. + * We deliberately don't lock-couple on this call. If the tree + * is tiny, i.e., one page, and two threads are busily updating + * the root page, we're almost guaranteed deadlocks galore, as + * each one gets a read lock and then blocks the other's attempt + * for a write lock. + */ if (!stack && ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) || (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { (void)memp_fput(dbp->mpf, h, 0); - if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0) + (void)__BT_LPUT(dbp, lock); + if ((ret = __bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0) return (ret); if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) { (void)__BT_LPUT(dbp, lock); diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c index bc09131b00..219d486dc5 100644 --- a/db2/btree/bt_split.c +++ b/db2/btree/bt_split.c @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_split.c 10.17 (Sleepycat) 11/2/97"; +static const char sccsid[] = "@(#)bt_split.c 10.18 (Sleepycat) 11/23/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -396,14 +396,14 @@ __bam_broot(dbp, rootp, lp, rp) * The btree comparison code guarantees that the left-most key on any * level of the tree is never used, so it doesn't need to be filled in. */ + memset(&bi, 0, sizeof(bi)); bi.len = 0; B_TSET(bi.type, B_KEYDATA, 0); bi.pgno = lp->pgno; if (F_ISSET(dbp, DB_BT_RECNUM)) { bi.nrecs = __bam_total(lp); RE_NREC_SET(rootp, bi.nrecs); - } else - bi.nrecs = 0; + } hdr.data = &bi; hdr.size = SSZA(BINTERNAL, data); if ((ret = @@ -591,6 +591,7 @@ __bam_pinsert(dbp, parent, lchild, rchild) return (DB_NEEDSPLIT); /* Add a new record for the right page. */ + memset(&bi, 0, sizeof(bi)); bi.len = child_bi->len; B_TSET(bi.type, child_bi->type, 0); bi.pgno = rchild->pgno; @@ -640,6 +641,7 @@ noprefix: nksize = child_bk->len; if (P_FREESPACE(ppage) < nbytes) return (DB_NEEDSPLIT); + memset(&bi, 0, sizeof(bi)); bi.len = nksize; B_TSET(bi.type, child_bk->type, 0); bi.pgno = rchild->pgno; @@ -661,6 +663,7 @@ noprefix: nksize = child_bk->len; if (P_FREESPACE(ppage) < nbytes) return (DB_NEEDSPLIT); + memset(&bi, 0, sizeof(bi)); bi.len = BOVERFLOW_SIZE; B_TSET(bi.type, child_bk->type, 0); bi.pgno = rchild->pgno; diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c index 45232bbc41..18b9b34975 100644 --- a/db2/btree/btree_auto.c +++ b/db2/btree/btree_auto.c @@ -100,7 +100,6 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_pg_alloc_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -265,7 +264,6 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_pg_free_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -460,7 +458,6 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_split_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_split_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -657,7 +654,6 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_rsplit_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -836,7 +832,6 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_adj_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_adj_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -995,7 +990,6 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_cadjust_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1145,7 +1139,6 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_cdel_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1329,7 +1322,6 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __bam_repl_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __bam_repl_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c index 74ba9ff426..05fc7cc084 100644 --- a/db2/common/db_appinit.c +++ b/db2/common/db_appinit.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_appinit.c 10.36 (Sleepycat) 10/28/97"; +static const char sccsid[] = "@(#)db_appinit.c 10.37 (Sleepycat) 11/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -108,10 +108,23 @@ db_appinit(db_home, db_config, dbenv, flags) if ((ret = __db_parse(dbenv, *p)) != 0) goto err; - /* Parse the config file. */ + /* + * Parse the config file. + * + * XXX + * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and + * the latter isn't standard, and we're manipulating strings handed + * us by the application. + */ if (dbenv->db_home != NULL) { - (void)snprintf(buf, - sizeof(buf), "%s/DB_CONFIG", dbenv->db_home); +#define CONFIG_NAME "/DB_CONFIG" + if (strlen(dbenv->db_home) + + strlen(CONFIG_NAME) + 1 > sizeof(buf)) { + ret = ENAMETOOLONG; + goto err; + } + (void)strcpy(buf, dbenv->db_home); + (void)strcat(buf, CONFIG_NAME); if ((fp = fopen(buf, "r")) != NULL) { while (fgets(buf, sizeof(buf), fp) != NULL) { if ((lp = strchr(buf, '\n')) != NULL) diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c index ac0176d70f..188c6b9f95 100644 --- a/db2/common/db_apprec.c +++ b/db2/common/db_apprec.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_apprec.c 10.18 (Sleepycat) 9/30/97"; +static const char sccsid[] = "@(#)db_apprec.c 10.19 (Sleepycat) 11/23/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -72,10 +72,8 @@ __db_apprec(dbenv, flags) if (LF_ISSET(DB_RECOVER_FATAL)) first_flag = DB_FIRST; else { - if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) { - F_SET(lp, is_thread); - return (0); - } + if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) + goto out; first_flag = DB_SET; } @@ -88,7 +86,7 @@ __db_apprec(dbenv, flags) (u_long)lsn.file, (u_long)lsn.offset); else __db_err(dbenv, "Retrieving first LSN"); - goto err; + goto out; } first_lsn = lsn; @@ -99,7 +97,7 @@ __db_apprec(dbenv, flags) if ((ret = log_get(dbenv->lg_info, &lsn, &data, DB_NEXT)) != 0) { if (ret != DB_NOTFOUND) - goto err; + goto out; break; } } @@ -123,7 +121,7 @@ __db_apprec(dbenv, flags) goto msgerr; } if (ret != 0 && ret != DB_NOTFOUND) - goto err; + goto out; for (ret = log_get(lp, &lsn, &data, DB_NEXT); ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT)) { @@ -134,7 +132,7 @@ __db_apprec(dbenv, flags) goto msgerr; } if (ret != DB_NOTFOUND) - goto err; + goto out; /* Now close all the db files that are open. */ __log_close_files(lp); @@ -148,7 +146,7 @@ __db_apprec(dbenv, flags) dbenv->tx_info->region->last_ckp = ckp_lsn; dbenv->tx_info->region->time_ckp = (u_int32_t)now; if ((ret = txn_checkpoint(dbenv->tx_info, 0, 0)) != 0) - goto err; + goto out; if (dbenv->db_verbose) { __db_err(lp->dbenv, "Recovery complete at %s", ctime(&now)); @@ -160,12 +158,13 @@ __db_apprec(dbenv, flags) (u_long)dbenv->tx_info->region->last_ckp.offset); } - F_SET(lp, is_thread); - return (0); + if (0) { +msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed", + (u_long)lsn.file, (u_long)lsn.offset); + } -msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed", - (u_long)lsn.file, (u_long)lsn.offset); +out: F_SET(lp, is_thread); + __db_txnlist_end(txninfo); -err: F_SET(lp, is_thread); return (ret); } diff --git a/db2/common/db_err.c b/db2/common/db_err.c index f3e7b40448..13f2cb5dc3 100644 --- a/db2/common/db_err.c +++ b/db2/common/db_err.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_err.c 10.18 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)db_err.c 10.19 (Sleepycat) 11/9/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -302,10 +302,13 @@ __db_cgetchk(dbp, key, data, flags, isvalid) break; case DB_SET: break; - case DB_SET_RECNO: case DB_GET_RECNO: if (!F_ISSET(dbp, DB_BT_RECNUM)) goto err; + break; + case DB_SET_RECNO: + if (!F_ISSET(dbp, DB_BT_RECNUM)) + goto err; check_key = 1; break; default: @@ -313,8 +316,9 @@ err: return (__db_ferr(dbp->dbenv, "c_get", 0)); } /* Check for invalid key/data flags. */ - DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, - DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL); + if (check_key) + DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, + DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL); DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags, DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL); diff --git a/db2/common/db_region.c b/db2/common/db_region.c index 3e8cd2dc66..4fc603a2b0 100644 --- a/db2/common/db_region.c +++ b/db2/common/db_region.c @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_region.c 10.15 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)db_region.c 10.18 (Sleepycat) 11/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -80,14 +80,14 @@ static int __db_rmap __P((DB_ENV *, int, size_t, void *)); * into memory, NULL on error. * * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME, - * PUBLIC: const char *, const char *, int, size_t, int *, void *)); + * PUBLIC: const char *, const char *, int, size_t, int, int *, void *)); */ int -__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) +__db_rcreate(dbenv, appname, path, file, mode, size, oflags, fdp, retp) DB_ENV *dbenv; APPNAME appname; const char *path, *file; - int mode, *fdp; + int mode, oflags, *fdp; size_t size; void *retp; { @@ -110,12 +110,13 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) /* * Now open the file. We need to make sure that multiple processes * that attempt to create the region at the same time are properly - * ordered, so we open it O_EXCL and O_CREAT so two simultaneous + * ordered, so we open it DB_EXCL and DB_CREATE so two simultaneous * attempts to create the region will return failure in one of the * attempts. */ - if (fd == -1 && (ret = __db_open(name, - DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) { + oflags |= DB_CREATE | DB_EXCL; + if (fd == -1 && + (ret = __db_open(name, oflags, oflags, mode, &fd)) != 0) { if (ret != EEXIST) __db_err(dbenv, "region create: %s: %s", name, strerror(ret)); @@ -398,7 +399,7 @@ __db_runlink(dbenv, appname, path, file, force) /* If the file doesn't exist, we're done. */ if (__db_exists(name, NULL)) - return (0); /* XXX: ENOENT? */ + goto done; /* * If we're called with a force flag, try and unlink the file. This @@ -412,8 +413,7 @@ __db_runlink(dbenv, appname, path, file, force) if (force) { if ((ret = __db_unlink(name)) != 0 && ret != ENOENT) goto err1; - FREES(name); - return (0); + goto done; } /* Open and lock the region. */ @@ -453,7 +453,7 @@ __db_runlink(dbenv, appname, path, file, force) (void)__db_sleep(0, 250000); } if (ret == 0) { - FREES(name); +done: FREES(name); return (0); } @@ -467,6 +467,7 @@ __db_runlink(dbenv, appname, path, file, force) err2: (void)__db_mutex_unlock(&rp->lock, fd); (void)__db_rclose(dbenv, fd, rp); err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret)); + FREES(name); return (ret); } diff --git a/db2/config.h b/db2/config.h index 27dbdaa439..7f784a0d9b 100644 --- a/db2/config.h +++ b/db2/config.h @@ -114,6 +114,9 @@ /* Define if you have the strsep function. */ #define HAVE_STRSEP 1 +/* Define if you have the sysconf function. */ +#define HAVE_SYSCONF 1 + /* Define if you have the vsnprintf function. */ #define HAVE_VSNPRINTF 1 @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db.h.src 10.91 (Sleepycat) 11/3/97 + * @(#)db.h.src 10.97 (Sleepycat) 11/28/97 */ #ifndef _DB_H_ @@ -73,8 +73,8 @@ #define DB_VERSION_MAJOR 2 #define DB_VERSION_MINOR 3 -#define DB_VERSION_PATCH 12 -#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.12: (11/3/97)" +#define DB_VERSION_PATCH 14 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.14: (11/28/97)" typedef u_int32_t db_pgno_t; /* Page number type. */ typedef u_int16_t db_indx_t; /* Page offset type. */ @@ -129,8 +129,10 @@ struct __db_dbt { }; /* - * DB configuration. There are a set of functions which the application - * can replace with its own versions. + * DB internal configuration. + * + * There are a set of functions that the application can replace with its + * own versions, and some other knobs which can be turned at run-time. */ #define DB_FUNC_CALLOC 1 /* ANSI C calloc. */ #define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */ @@ -147,11 +149,12 @@ struct __db_dbt { #define DB_FUNC_REALLOC 13 /* ANSI C realloc. */ #define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */ #define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */ -#define DB_FUNC_STRDUP 16 /* ANSI C strdup. */ +#define DB_FUNC_STRDUP 16 /* DB: strdup(3). */ #define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */ #define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */ #define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */ #define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */ +#define DB_TSL_SPINS 21 /* DB: initialize spin count. */ /* * Database configuration and initialization. @@ -211,10 +214,10 @@ struct __db_dbt { * locking subsystem. */ #define DB_LOCK_NORUN 0x0 -#define DB_LOCK_DEFAULT 0x1 -#define DB_LOCK_OLDEST 0x2 -#define DB_LOCK_RANDOM 0x3 -#define DB_LOCK_YOUNGEST 0x4 +#define DB_LOCK_DEFAULT 0x1 /* Default policy. */ +#define DB_LOCK_OLDEST 0x2 /* Abort oldest transaction. */ +#define DB_LOCK_RANDOM 0x3 /* Abort random transaction. */ +#define DB_LOCK_YOUNGEST 0x4 /* Abort youngest transaction. */ struct __db_env { int db_lorder; /* Byte order. */ @@ -265,6 +268,10 @@ struct __db_env { /******************************************************* * Access methods. *******************************************************/ +/* + * XXX + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ typedef enum { DB_BTREE=1, /* B+tree. */ DB_HASH, /* Extended Linear Hashing. */ @@ -347,7 +354,13 @@ struct __db_info { #define DB_SET_RANGE 0x020000 /* c_get() */ #define DB_SET_RECNO 0x040000 /* c_get() */ -/* DB (user visible) error return codes. */ +/* + * DB (user visible) error return codes. + * + * XXX + * Changes to any of the user visible error return codes must be reflected + * in java/src/com/sleepycat/db/Db.java. + */ #define DB_INCOMPLETE ( -1) /* Sync didn't finish. */ #define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or was never created by the user. */ @@ -516,6 +529,7 @@ int db_appinit __P((const char *, char * const *, DB_ENV *, int)); int db_appexit __P((DB_ENV *)); int db_jump_set __P((void *, int)); int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); +int db_value_set __P((int, int)); char *db_version __P((int *, int *, int *)); #if defined(__cplusplus) }; @@ -533,16 +547,26 @@ char *db_version __P((int *, int *, int *)); /* Flag values for lock_detect(). */ #define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */ -/* Request types. */ +/* + * Request types. + * + * XXX + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ typedef enum { - DB_LOCK_DUMP, /* Display held locks. */ + DB_LOCK_DUMP=0, /* Display held locks. */ DB_LOCK_GET, /* Get the lock. */ DB_LOCK_PUT, /* Release the lock. */ DB_LOCK_PUT_ALL, /* Release locker's locks. */ DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */ } db_lockop_t; -/* Simple R/W lock modes and for multi-granularity intention locking. */ +/* + * Simple R/W lock modes and for multi-granularity intention locking. + * + * XXX + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ typedef enum { DB_LOCK_NG=0, /* Not granted. */ DB_LOCK_READ, /* Shared/read. */ @@ -577,7 +601,7 @@ extern const u_int8_t db_riw_conflicts[]; extern "C" { #endif int lock_close __P((DB_LOCKTAB *)); -int lock_detect __P((DB_LOCKTAB *, int, u_int32_t)); +int lock_detect __P((DB_LOCKTAB *, int, int)); int lock_get __P((DB_LOCKTAB *, u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *)); int lock_id __P((DB_LOCKTAB *, u_int32_t *)); diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c index d40d964542..88bca7b583 100644 --- a/db2/db/db_auto.c +++ b/db2/db/db_auto.c @@ -122,7 +122,6 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_addrem_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_addrem_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -312,7 +311,6 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_split_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_split_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -507,7 +505,6 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_big_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_big_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -678,7 +675,6 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_ovref_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_ovref_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -842,7 +838,6 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_relink_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_relink_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1005,7 +1000,6 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_addpage_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_addpage_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1180,7 +1174,6 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __db_debug_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_debug_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1335,7 +1328,6 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags) * PUBLIC: int __db_noop_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __db_noop_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c index a4bcdb7628..4f89d2b917 100644 --- a/db2/db/db_dispatch.c +++ b/db2/db/db_dispatch.c @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dispatch.c 10.6 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)db_dispatch.c 10.7 (Sleepycat) 11/23/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -236,8 +236,8 @@ __db_txnlist_find(listp, txnid) void *listp; u_int32_t txnid; { - __db_txnlist *p; __db_txnhead *hp; + __db_txnlist *p; if ((hp = (struct __db_txnhead *)listp) == NULL) return (DB_NOTFOUND); @@ -255,12 +255,16 @@ __db_txnlist_find(listp, txnid) } #ifdef DEBUG +/* + * __db_txnlist_print -- + * Print out the transaction list. + */ void __db_txnlist_print(listp) void *listp; { - __db_txnlist *p; __db_txnhead *hp; + __db_txnlist *p; hp = (struct __db_txnhead *)listp; printf("Maxid: %lu\n", (u_long)hp->maxid); @@ -268,3 +272,24 @@ __db_txnlist_print(listp) printf("TXNID: %lu\n", (u_long)p->txnid); } #endif + +/* + * __db_txnlist_end -- + * Discard transaction linked list. + * + * PUBLIC: void __db_txnlist_end __P((void *)); + */ +void +__db_txnlist_end(listp) + void *listp; +{ + __db_txnhead *hp; + __db_txnlist *p; + + hp = (struct __db_txnhead *)listp; + while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) { + LIST_REMOVE(p, links); + __db_free(p); + } + __db_free(listp); +} diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c index bcec308b95..65441aa45a 100644 --- a/db2/db/db_ret.c +++ b/db2/db/db_ret.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_ret.c 10.8 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)db_ret.c 10.10 (Sleepycat) 11/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -116,35 +116,48 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) dbt->size = len; /* - * Allocate any necessary memory. + * Allocate memory to be owned by the application: DB_DBT_MALLOC. * - * XXX: Never allocate 0 bytes. + * !!! + * We always allocate memory, even if we're copying out 0 bytes. This + * guarantees consistency, i.e., the application can always free memory + * without concern as to how many bytes of the record were requested. + * + * XXX + * Never allocate 0 bytes, it's known to make malloc/realloc unhappy. + * + * Use the memory specified by the application: DB_DBT_USERMEM. + * + * !!! + * If the length we're going to copy is 0, the application-supplied + * memory pointer is allowed to be NULL. */ if (F_ISSET(dbt, DB_DBT_MALLOC)) { dbt->data = db_malloc == NULL ? - (void *)__db_malloc(len + 1) : + (void *)__db_malloc(len) : (void *)db_malloc(len + 1); if (dbt->data == NULL) return (ENOMEM); } else if (F_ISSET(dbt, DB_DBT_USERMEM)) { - if (dbt->ulen < len) + if (len != 0 && (dbt->data == NULL || dbt->ulen < len)) return (ENOMEM); } else if (memp == NULL || memsize == NULL) { return (EINVAL); } else { - if (*memsize == 0 || *memsize < len) { + if (len != 0 && (*memsize == 0 || *memsize < len)) { *memp = *memp == NULL ? - (void *)__db_malloc(len + 1) : - (void *)__db_realloc(*memp, len + 1); + (void *)__db_malloc(len) : + (void *)__db_realloc(*memp, len); if (*memp == NULL) { *memsize = 0; return (ENOMEM); } - *memsize = len + 1; + *memsize = len; } dbt->data = *memp; } - memcpy(dbt->data, data, len); + if (len != 0) + memcpy(dbt->data, data, len); return (0); } diff --git a/db2/db_int.h b/db2/db_int.h index 1f6c790345..21460722a3 100644 --- a/db2/db_int.h +++ b/db2/db_int.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_int.h.src 10.36 (Sleepycat) 10/31/97 + * @(#)db_int.h.src 10.37 (Sleepycat) 11/25/97 */ #ifndef _DB_INTERNAL_H_ @@ -168,6 +168,7 @@ typedef struct _db_mutex_t { off_t off; /* Backing file offset. */ u_long pid; /* Lock holder: 0 or process pid. */ #endif + u_int32_t spins; /* Spins before block. */ u_int32_t mutex_set_wait; /* Granted after wait. */ u_int32_t mutex_set_nowait; /* Granted without waiting. */ } db_mutex_t; diff --git a/db2/dbm/dbm.c b/db2/dbm/dbm.c index 1fa92ce1fa..bd7c7a6636 100644 --- a/db2/dbm/dbm.c +++ b/db2/dbm/dbm.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)dbm.c 10.6 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)dbm.c 10.7 (Sleepycat) 11/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -198,9 +198,20 @@ dbm_open(file, oflags, mode) dbinfo.h_ffactor = 40; dbinfo.h_nelem = 1; - (void)snprintf(path, sizeof(path), "%s%s", file, DBM_SUFFIX); - if ((__set_errno(db_open(path, - DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp))) != 0) + /* + * XXX + * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and + * the latter isn't standard, and we're manipulating strings handed + * us by the application. + */ + if (strlen(file) + strlen(DBM_SUFFIX) + 1 > sizeof(path)) { + errno = ENAMETOOLONG; + return (NULL); + } + (void)strcpy(path, file); + (void)strcat(path, DBM_SUFFIX); + if ((errno = db_open(path, + DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp)) != 0) return (NULL); return ((DBM *)dbp); } @@ -261,7 +272,7 @@ dbm_firstkey(db) DBC *cp; if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL) - if ((__set_errno(db->cursor(db, NULL, &cp))) != 0) { + if ((errno = db->cursor(db, NULL, &cp)) != 0) { memset(&key, 0, sizeof(key)); return (key); } @@ -294,7 +305,7 @@ dbm_nextkey(db) int status; if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL) - if ((__set_errno(db->cursor(db, NULL, &cp))) != 0) { + if ((errno = db->cursor(db, NULL, &cp)) != 0) { memset(&key, 0, sizeof(key)); return (key); } @@ -330,9 +341,9 @@ dbm_delete(db, key) _key.size = key.dsize; ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0); if (ret < 0) - __set_errno(ENOENT); + errno = ENOENT; else if (ret > 0) { - __set_errno(ret); + errno = ret; ret = -1; } return (ret); diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c index 4820eb8611..787ee04ddb 100644 --- a/db2/hash/hash_auto.c +++ b/db2/hash/hash_auto.c @@ -119,7 +119,6 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_insdel_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -316,7 +315,6 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_newpage_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -479,7 +477,6 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_splitmeta_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -640,7 +637,6 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_splitdata_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -832,7 +828,6 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_replace_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_replace_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1034,7 +1029,6 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_newpgno_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1203,7 +1197,6 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_ovfl_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -1386,7 +1379,6 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __ham_copypage_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h index bbe0d971b2..46f2227bdd 100644 --- a/db2/include/btree_ext.h +++ b/db2/include/btree_ext.h @@ -11,7 +11,7 @@ int __bam_cursor __P((DB *, DB_TXN *, DBC **)); int __bam_c_iclose __P((DB *, DBC *)); int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int)); -int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *)); +int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int)); void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int)); void __bam_ca_dup __P((DB *, db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t)); diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h index b814582abd..29bc9aa4e2 100644 --- a/db2/include/common_ext.h +++ b/db2/include/common_ext.h @@ -23,7 +23,7 @@ int __db_syncchk __P((const DB *, int)); int __db_ferr __P((const DB_ENV *, const char *, int)); u_int32_t __db_log2 __P((u_int32_t)); int __db_rcreate __P((DB_ENV *, APPNAME, - const char *, const char *, int, size_t, int *, void *)); + const char *, const char *, int, size_t, int, int *, void *)); int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int)); int __db_ropen __P((DB_ENV *, APPNAME, const char *, const char *, int, int *, void *)); diff --git a/db2/include/db.h.src b/db2/include/db.h.src index 3cc2bfd4fc..654eb16425 100644 --- a/db2/include/db.h.src +++ b/db2/include/db.h.src @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db.h.src 10.91 (Sleepycat) 11/3/97 + * @(#)db.h.src 10.97 (Sleepycat) 11/28/97 */ #ifndef _DB_H_ @@ -73,8 +73,8 @@ #define DB_VERSION_MAJOR 2 #define DB_VERSION_MINOR 3 -#define DB_VERSION_PATCH 12 -#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.12: (11/3/97)" +#define DB_VERSION_PATCH 14 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.14: (11/28/97)" typedef u_int32_t db_pgno_t; /* Page number type. */ typedef u_int16_t db_indx_t; /* Page offset type. */ @@ -129,8 +129,10 @@ struct __db_dbt { }; /* - * DB configuration. There are a set of functions which the application - * can replace with its own versions. + * DB internal configuration. + * + * There are a set of functions that the application can replace with its + * own versions, and some other knobs which can be turned at run-time. */ #define DB_FUNC_CALLOC 1 /* ANSI C calloc. */ #define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */ @@ -147,11 +149,12 @@ struct __db_dbt { #define DB_FUNC_REALLOC 13 /* ANSI C realloc. */ #define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */ #define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */ -#define DB_FUNC_STRDUP 16 /* ANSI C strdup. */ +#define DB_FUNC_STRDUP 16 /* DB: strdup(3). */ #define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */ #define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */ #define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */ #define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */ +#define DB_TSL_SPINS 21 /* DB: initialize spin count. */ /* * Database configuration and initialization. @@ -211,10 +214,10 @@ struct __db_dbt { * locking subsystem. */ #define DB_LOCK_NORUN 0x0 -#define DB_LOCK_DEFAULT 0x1 -#define DB_LOCK_OLDEST 0x2 -#define DB_LOCK_RANDOM 0x3 -#define DB_LOCK_YOUNGEST 0x4 +#define DB_LOCK_DEFAULT 0x1 /* Default policy. */ +#define DB_LOCK_OLDEST 0x2 /* Abort oldest transaction. */ +#define DB_LOCK_RANDOM 0x3 /* Abort random transaction. */ +#define DB_LOCK_YOUNGEST 0x4 /* Abort youngest transaction. */ struct __db_env { int db_lorder; /* Byte order. */ @@ -265,6 +268,10 @@ struct __db_env { /******************************************************* * Access methods. *******************************************************/ +/* + * XXX + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ typedef enum { DB_BTREE=1, /* B+tree. */ DB_HASH, /* Extended Linear Hashing. */ @@ -347,7 +354,13 @@ struct __db_info { #define DB_SET_RANGE 0x020000 /* c_get() */ #define DB_SET_RECNO 0x040000 /* c_get() */ -/* DB (user visible) error return codes. */ +/* + * DB (user visible) error return codes. + * + * XXX + * Changes to any of the user visible error return codes must be reflected + * in java/src/com/sleepycat/db/Db.java. + */ #define DB_INCOMPLETE ( -1) /* Sync didn't finish. */ #define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or was never created by the user. */ @@ -516,6 +529,7 @@ int db_appinit __P((const char *, char * const *, DB_ENV *, int)); int db_appexit __P((DB_ENV *)); int db_jump_set __P((void *, int)); int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); +int db_value_set __P((int, int)); char *db_version __P((int *, int *, int *)); #if defined(__cplusplus) }; @@ -533,16 +547,26 @@ char *db_version __P((int *, int *, int *)); /* Flag values for lock_detect(). */ #define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */ -/* Request types. */ +/* + * Request types. + * + * XXX + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ typedef enum { - DB_LOCK_DUMP, /* Display held locks. */ + DB_LOCK_DUMP=0, /* Display held locks. */ DB_LOCK_GET, /* Get the lock. */ DB_LOCK_PUT, /* Release the lock. */ DB_LOCK_PUT_ALL, /* Release locker's locks. */ DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */ } db_lockop_t; -/* Simple R/W lock modes and for multi-granularity intention locking. */ +/* + * Simple R/W lock modes and for multi-granularity intention locking. + * + * XXX + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ typedef enum { DB_LOCK_NG=0, /* Not granted. */ DB_LOCK_READ, /* Shared/read. */ @@ -577,7 +601,7 @@ extern const u_int8_t db_riw_conflicts[]; extern "C" { #endif int lock_close __P((DB_LOCKTAB *)); -int lock_detect __P((DB_LOCKTAB *, int, u_int32_t)); +int lock_detect __P((DB_LOCKTAB *, int, int)); int lock_get __P((DB_LOCKTAB *, u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *)); int lock_id __P((DB_LOCKTAB *, u_int32_t *)); diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h index 01d1231092..83523c5559 100644 --- a/db2/include/db_cxx.h +++ b/db2/include/db_cxx.h @@ -4,7 +4,7 @@ * Copyright (c) 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_cxx.h 10.12 (Sleepycat) 10/25/97 + * @(#)db_cxx.h 10.13 (Sleepycat) 11/25/97 */ #ifndef _DB_CXX_H_ @@ -202,7 +202,7 @@ class _exported DbLockTab friend DbEnv; public: int close(); - int detect(int atype, u_int32_t flags); + int detect(int flags, int atype); int get(u_int32_t locker, int flags, const Dbt *obj, db_lockmode_t lock_mode, DbLock *lock); int id(u_int32_t *idp); diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h index f9b3b3a214..15eeaf50a3 100644 --- a/db2/include/db_ext.h +++ b/db2/include/db_ext.h @@ -65,6 +65,7 @@ int __db_add_recovery __P((DB_ENV *, int __db_txnlist_init __P((void *)); int __db_txnlist_add __P((void *, u_int32_t)); int __db_txnlist_find __P((void *, u_int32_t)); +void __db_txnlist_end __P((void *)); int __db_dput __P((DB *, DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); int __db_drem __P((DB *, diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src index abd93a6e8e..03a882fded 100644 --- a/db2/include/db_int.h.src +++ b/db2/include/db_int.h.src @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_int.h.src 10.36 (Sleepycat) 10/31/97 + * @(#)db_int.h.src 10.37 (Sleepycat) 11/25/97 */ #ifndef _DB_INTERNAL_H_ @@ -168,6 +168,7 @@ typedef struct _db_mutex_t { off_t off; /* Backing file offset. */ u_long pid; /* Lock holder: 0 or process pid. */ #endif + u_int32_t spins; /* Spins before block. */ u_int32_t mutex_set_wait; /* Granted after wait. */ u_int32_t mutex_set_nowait; /* Granted without waiting. */ } db_mutex_t; diff --git a/db2/include/lock.h b/db2/include/lock.h index 8a927f076e..5031b65d06 100644 --- a/db2/include/lock.h +++ b/db2/include/lock.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)lock.h 10.9 (Sleepycat) 10/25/97 + * @(#)lock.h 10.10 (Sleepycat) 11/13/97 */ typedef struct __db_lockobj DB_LOCKOBJ; @@ -85,10 +85,14 @@ struct __db_lockobj { } dlinks; #define DB_LOCK_OBJTYPE 1 #define DB_LOCK_LOCKER 2 + /* Allocate room in the object to + * hold typical DB lock structures + * so that we do not have to + * allocate them from shalloc. */ + u_int8_t objdata[sizeof(struct __db_ilock)]; u_int8_t type; /* Real object or locker id. */ }; - #define dd_id wlinks._dd_id #define waiters wlinks._waiters #define holders dlinks._holders diff --git a/db2/include/log.h b/db2/include/log.h index a192a38136..405daf4148 100644 --- a/db2/include/log.h +++ b/db2/include/log.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)log.h 10.15 (Sleepycat) 11/2/97 + * @(#)log.h 10.16 (Sleepycat) 11/9/97 */ #ifndef _LOG_H_ @@ -117,20 +117,35 @@ struct __log { SH_TAILQ_HEAD(__fq) fq; /* List of file names. */ + /* + * The lsn LSN is the file offset that we're about to write and which + * we will return to the user. + */ DB_LSN lsn; /* LSN at current file offset. */ - DB_LSN c_lsn; /* LSN of the last checkpoint. */ + + /* + * The s_lsn LSN is the last LSN that we know is on disk, not just + * written, by synced. + */ DB_LSN s_lsn; /* LSN of the last sync. */ - DB_LSN uw_lsn; /* LSN of 1st rec not fully on disk. */ u_int32_t len; /* Length of the last record. */ - size_t b_off; /* Current offset in the buffer. */ u_int32_t w_off; /* Current write offset in the file. */ + DB_LSN c_lsn; /* LSN of the last checkpoint. */ time_t chkpt; /* Time of the last checkpoint. */ DB_LOG_STAT stat; /* Log statistics. */ + /* + * The f_lsn LSN is the LSN (returned to the user) that "owns" the + * first byte of the buffer. If the record associated with the LSN + * spans buffers, it may not reflect the physical file location of + * the first byte of the buffer. + */ + DB_LSN f_lsn; /* LSN of first byte in the buffer. */ + size_t b_off; /* Current offset in the buffer. */ u_int8_t buf[4 * 1024]; /* Log buffer. */ }; diff --git a/db2/include/mp.h b/db2/include/mp.h index f68f42b144..f108246f2c 100644 --- a/db2/include/mp.h +++ b/db2/include/mp.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)mp.h 10.19 (Sleepycat) 10/25/97 + * @(#)mp.h 10.22 (Sleepycat) 11/28/97 */ struct __bh; typedef struct __bh BH; @@ -23,7 +23,6 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define DB_CACHESIZE_MIN ( 20 * 1024) #define INVALID 0 /* Invalid shared memory offset. */ -#define TEMPORARY "<tmp>" /* Temporary file name. */ /* * There are three ways we do locking in the mpool code: @@ -122,10 +121,10 @@ struct __db_mpool { int fd; /* Underlying mmap'd fd. */ - #define MP_ISPRIVATE 0x01 /* Private, so local memory. */ #define MP_LOCKHANDLE 0x02 /* Threaded, lock handles and region. */ #define MP_LOCKREGION 0x04 /* Concurrent access, lock region. */ +#define MP_MALLOC 0x08 /* If region in allocated memory. */ u_int32_t flags; }; @@ -157,7 +156,6 @@ struct __db_mpoolfile { /* These fields are not protected. */ TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */ - char *path; /* Initial file path. */ DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */ MPOOLFILE *mfp; /* Underlying MPOOLFILE. */ @@ -165,11 +163,9 @@ struct __db_mpoolfile { size_t len; /* Length of mmap'd region. */ /* These fields need to be protected for multi-threaded support. */ -#define MP_PATH_ALLOC 0x001 /* Path is allocated memory. */ -#define MP_PATH_TEMP 0x002 /* Backing file is a temporary. */ -#define MP_READONLY 0x004 /* File is readonly. */ -#define MP_UPGRADE 0x008 /* File descriptor is readwrite. */ -#define MP_UPGRADE_FAIL 0x010 /* Upgrade wasn't possible. */ +#define MP_READONLY 0x01 /* File is readonly. */ +#define MP_UPGRADE 0x02 /* File descriptor is readwrite. */ +#define MP_UPGRADE_FAIL 0x04 /* Upgrade wasn't possible. */ u_int32_t flags; }; @@ -220,12 +216,9 @@ struct __mpoolfile { u_int32_t ref; /* Reference count. */ int ftype; /* File type. */ - int can_mmap; /* If the file can be mmap'd. */ - int lsn_off; /* Page's LSN offset. */ size_t path_off; /* File name location. */ - size_t fileid_off; /* File identification location. */ size_t pgcookie_len; /* Pgin/pgout cookie length. */ @@ -233,6 +226,12 @@ struct __mpoolfile { int lsn_cnt; /* Checkpoint buffers left to write. */ + db_pgno_t last_pgno; /* Last page in the file. */ + +#define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */ +#define MP_TEMP 0x02 /* Backing file is a temporary. */ + u_int32_t flags; + DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */ }; diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h index 49d86ba2e5..b78b3423cd 100644 --- a/db2/include/mp_ext.h +++ b/db2/include/mp_ext.h @@ -5,8 +5,10 @@ int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *)); int __memp_pg __P((DB_MPOOLFILE *, BH *, int)); void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int)); -int __memp_fopen __P((DB_MPOOL *, const char *, int, int, - int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); +int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int, + int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); +char * __memp_fn __P((DB_MPOOLFILE *)); +char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); void __memp_debug __P((DB_MPOOL *, FILE *, int)); int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *)); int __memp_ropen diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h index e48a1e9407..2edf2e257d 100644 --- a/db2/include/os_ext.h +++ b/db2/include/os_ext.h @@ -1,5 +1,8 @@ /* DO NOT EDIT: automatically built by dist/distrib. */ int __db_abspath __P((const char *)); +void *__db_calloc __P((size_t, size_t)); +void *__db_malloc __P((size_t)); +void *__db_realloc __P((void *, size_t)); int __os_dirlist __P((const char *, char ***, int *)); void __os_dirfree __P((char **, int)); int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *)); @@ -14,6 +17,7 @@ int __db_read __P((int, void *, size_t, ssize_t *)); int __db_write __P((int, void *, size_t, ssize_t *)); int __os_seek __P((int, size_t, db_pgno_t, u_long, int)); int __os_sleep __P((u_long, u_long)); +int __os_spin __P((void)); int __os_exists __P((const char *, int *)); int __os_ioinfo __P((const char *, int, off_t *, off_t *)); int __db_unlink __P((const char *)); diff --git a/db2/include/os_func.h b/db2/include/os_func.h index 0a72942903..54b64ffaa2 100644 --- a/db2/include/os_func.h +++ b/db2/include/os_func.h @@ -4,12 +4,11 @@ * Copyright (c) 1997 * Sleepycat Software. All rights reserved. * - * @(#)os_func.h 10.2 (Sleepycat) 10/28/97 + * @(#)os_func.h 10.4 (Sleepycat) 11/28/97 */ /* Calls which can be replaced by the application. */ struct __db_jumptab { - void *(*db_calloc) __P((size_t, size_t)); /* DB_FUNC_CALLOC */ int (*db_close) __P((int)); /* DB_FUNC_CLOSE */ void (*db_dirfree) __P((char **, int)); /* DB_FUNC_DIRFREE */ int (*db_dirlist) /* DB_FUNC_DIRLIST */ @@ -54,7 +53,6 @@ extern struct __db_jumptab __db_jump; * part of DB is the only code that should use the __os_XXX names, all other * parts of DB should be calling __db_XXX functions. */ -#define __db_calloc __db_jump.db_calloc #define __os_close __db_jump.db_close /* __db_close is a wrapper. */ #define __db_dirfree __db_jump.db_dirfree #define __db_dirlist __db_jump.db_dirlist @@ -62,11 +60,9 @@ extern struct __db_jumptab __db_jump; #define __db_free __db_jump.db_free #define __os_fsync __db_jump.db_fsync /* __db_fsync is a wrapper. */ #define __db_ioinfo __db_jump.db_ioinfo -#define __db_malloc __db_jump.db_malloc #define __db_map __db_jump.db_map #define __os_open __db_jump.db_open /* __db_open is a wrapper. */ #define __os_read __db_jump.db_read /* __db_read is a wrapper. */ -#define __db_realloc __db_jump.db_realloc #define __db_seek __db_jump.db_seek #define __db_sleep __db_jump.db_sleep #define __db_strdup __db_jump.db_strdup diff --git a/db2/lock/lock.c b/db2/lock/lock.c index f1223a9fa6..9b1cbc8a08 100644 --- a/db2/lock/lock.c +++ b/db2/lock/lock.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)lock.c 10.38 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)lock.c 10.41 (Sleepycat) 11/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -91,7 +91,7 @@ __lock_create(path, mode, dbenv) if ((ret = __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode, LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)), - &fd, &lrp)) != 0) + 0, &fd, &lrp)) != 0) return (ret); /* Region exists; now initialize it. */ @@ -600,7 +600,9 @@ __lock_put_internal(lt, lockp, do_all) if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) { HASHREMOVE_EL(lt->hashtab, __db_lockobj, links, sh_obj, lt->region->table_size, __lock_lhash); - __db_shalloc_free(lt->mem, SH_DBT_PTR(&sh_obj->lockobj)); + if (sh_obj->lockobj.size > sizeof(sh_obj->objdata)) + __db_shalloc_free(lt->mem, + SH_DBT_PTR(&sh_obj->lockobj)); SH_TAILQ_INSERT_HEAD(<->region->free_objs, sh_obj, links, __db_lockobj); state_changed = 1; @@ -633,7 +635,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) DB_LOCKOBJ *sh_obj, *sh_locker; DB_LOCKREGION *lrp; size_t newl_off; - int ret; + int ihold, ret; ret = 0; /* @@ -680,29 +682,40 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) * new lock if it does not conflict with anyone on the holders list * OR anyone on the waiters list. The reason that we don't grant if * there's a conflict is that this can lead to starvation (a writer - * waiting on a popularly read item will never ben granted). The + * waiting on a popularly read item will never be granted). The * downside of this is that a waiting reader can prevent an upgrade - * from reader to writer, which is not uncommon. In case of conflict, - * we put the new lock on the end of the waiters list. + * from reader to writer, which is not uncommon. + * + * There is one exception to the no-conflict rule. If a lock is held + * by the requesting locker AND the new lock does not conflict with + * any other holders, then we grant the lock. The most common place + * this happens is when the holder has a WRITE lock and a READ lock + * request comes in for the same locker. If we do not grant the read + * lock, then we guarantee deadlock. + * + * In case of conflict, we put the new lock on the end of the waiters + * list. */ + ihold = 0; for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { - if (CONFLICTS(lt, lp->mode, lock_mode) && - locker != lp->holder) + if (locker == lp->holder) { + if (lp->mode == lock_mode && + lp->status == DB_LSTAT_HELD) { + /* Lock is held, just inc the ref count. */ + lp->refcount++; + SH_TAILQ_INSERT_HEAD(&lrp->free_locks, + newl, links, __db_lock); + *lockp = lp; + return (0); + } else + ihold = 1; + } else if (CONFLICTS(lt, lp->mode, lock_mode)) break; - else if (lp->holder == locker && lp->mode == lock_mode && - lp->status == DB_LSTAT_HELD) { - /* Lock is already held, just inc the ref count. */ - lp->refcount++; - SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, - __db_lock); - *lockp = lp; - return (0); - } } - if (lp == NULL) + if (lp == NULL && !ihold) for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { @@ -1261,25 +1274,37 @@ __lock_getobj(lt, locker, dbt, type, objp) */ if (sh_obj == NULL) { /* Create new object and then insert it into hash table. */ - if ((sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) - == NULL) { + if ((sh_obj = + SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) { if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0) return (ret); lrp = lt->region; sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); } - if ((ret = __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) { - if ((ret = __lock_grow_region(lt, - DB_LOCK_MEM, obj_size)) != 0) - return (ret); - lrp = lt->region; - /* Reacquire the head of the list. */ - sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); - (void)__db_shalloc(lt->mem, obj_size, 0, &p); - } - sh_obj->type = type; + + /* + * If we can fit this object in the structure, do so instead + * of shalloc-ing space for it. + */ + if (obj_size <= sizeof(sh_obj->objdata)) + p = sh_obj->objdata; + else + if ((ret = + __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) { + if ((ret = __lock_grow_region(lt, + DB_LOCK_MEM, obj_size)) != 0) + return (ret); + lrp = lt->region; + /* Reacquire the head of the list. */ + sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, + __db_lockobj); + (void)__db_shalloc(lt->mem, obj_size, 0, &p); + } + src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker; memcpy(p, src, obj_size); + + sh_obj->type = type; SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj); SH_TAILQ_INIT(&sh_obj->waiters); @@ -1329,7 +1354,8 @@ __lock_freeobj(lt, obj) { HASHREMOVE_EL(lt->hashtab, __db_lockobj, links, obj, lt->region->table_size, __lock_lhash); - __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj)); + if (obj->lockobj.size > sizeof(obj->objdata)) + __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj)); SH_TAILQ_INSERT_HEAD(<->region->free_objs, obj, links, __db_lockobj); } diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c index 566021fe89..93c438ca36 100644 --- a/db2/lock/lock_deadlock.c +++ b/db2/lock/lock_deadlock.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)lock_deadlock.c 10.25 (Sleepycat) 11/1/97"; +static const char sccsid[] = "@(#)lock_deadlock.c 10.26 (Sleepycat) 11/25/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -66,8 +66,7 @@ static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t)); int lock_detect(lt, flags, atype) DB_LOCKTAB *lt; - int flags; - u_int32_t atype; + int flags, atype; { DB_ENV *dbenv; locker_info *idmap; diff --git a/db2/log/log.c b/db2/log/log.c index 17681f8e0f..a9bf7a95ab 100644 --- a/db2/log/log.c +++ b/db2/log/log.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log.c 10.33 (Sleepycat) 11/2/97"; +static const char sccsid[] = "@(#)log.c 10.34 (Sleepycat) 11/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -91,7 +91,7 @@ log_open(path, flags, mode, dbenv, lpp) retry_cnt = newregion = 0; retry: if (LF_ISSET(DB_CREATE)) { ret = __db_rcreate(dbenv, DB_APP_LOG, path, - DB_DEFAULT_LOG_FILE, mode, len, &fd, &dblp->maddr); + DB_DEFAULT_LOG_FILE, mode, len, 0, &fd, &dblp->maddr); if (ret == 0) { /* Put the LOG structure first in the region. */ lp = dblp->maddr; diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c index 140ea31fd1..0248e2815c 100644 --- a/db2/log/log_archive.c +++ b/db2/log/log_archive.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_archive.c 10.28 (Sleepycat) 10/28/97"; +static const char sccsid[] = "@(#)log_archive.c 10.29 (Sleepycat) 11/12/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -360,9 +360,9 @@ __absname(pref, name, newnamep) * If the user has their own malloc routine, use it. */ static int -__usermem(listp, func) +__usermem(listp, cmpfunc) char ***listp; - void *(*func) __P((size_t)); + void *(*cmpfunc) __P((size_t)); { size_t len; char **array, **arrayp, **orig, *strp; @@ -378,10 +378,10 @@ __usermem(listp, func) * XXX * Don't simplify this expression, SunOS compilers don't like it. */ - if (func == NULL) + if (cmpfunc == NULL) array = (char **)__db_malloc(len); else - array = (char **)func(len); + array = (char **)cmpfunc(len); if (array == NULL) return (ENOMEM); strp = (char *)(array + (orig - *listp) + 1); diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c index d5dbfe1f5f..61626b090e 100644 --- a/db2/log/log_auto.c +++ b/db2/log/log_auto.c @@ -102,7 +102,6 @@ int __log_register_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __log_register_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __log_register_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -250,7 +249,6 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __log_unregister_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __log_unregister_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; diff --git a/db2/log/log_get.c b/db2/log/log_get.c index ed35d57f82..2d1512c6b9 100644 --- a/db2/log/log_get.c +++ b/db2/log/log_get.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_get.c 10.21 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)log_get.c 10.22 (Sleepycat) 11/22/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -44,24 +44,21 @@ log_get(dblp, alsn, dbt, flags) /* Validate arguments. */ #define OKFLAGS (DB_CHECKPOINT | \ DB_CURRENT | DB_FIRST | DB_LAST | DB_NEXT | DB_PREV | DB_SET) - if (flags != 0) { - if ((ret = - __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0) - return (ret); - switch (flags) { - case DB_CHECKPOINT: - case DB_CURRENT: - case DB_FIRST: - case DB_LAST: - case DB_NEXT: - case DB_PREV: - case DB_SET: - case 0: - break; - default: - return (__db_ferr(dblp->dbenv, "log_get", 1)); - } + if ((ret = __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0) + return (ret); + switch (flags) { + case DB_CHECKPOINT: + case DB_CURRENT: + case DB_FIRST: + case DB_LAST: + case DB_NEXT: + case DB_PREV: + case DB_SET: + break; + default: + return (__db_ferr(dblp->dbenv, "log_get", 1)); } + if (F_ISSET(dblp, DB_AM_THREAD)) { if (LF_ISSET(DB_NEXT | DB_PREV | DB_CURRENT)) return (__db_ferr(dblp->dbenv, "log_get", 1)); diff --git a/db2/log/log_put.c b/db2/log/log_put.c index 92d9563301..42fec88a7d 100644 --- a/db2/log/log_put.c +++ b/db2/log/log_put.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97"; +static const char sccsid[] = "@(#)log_put.c 10.22 (Sleepycat) 11/12/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -28,10 +28,10 @@ static const char sccsid[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97"; #include "hash.h" #include "common_ext.h" -static int __log_fill __P((DB_LOG *, void *, u_int32_t)); +static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); static int __log_flush __P((DB_LOG *, const DB_LSN *)); static int __log_newfd __P((DB_LOG *)); -static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t)); +static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); static int __log_write __P((DB_LOG *, void *, u_int32_t)); /* @@ -117,12 +117,13 @@ __log_put(dblp, lsn, dbt, flags) /* Reset the file write offset. */ lp->w_off = 0; - - /* Reset the first-unwritten LSN for the buffer. */ - lp->uw_lsn = lp->lsn; } else lastoff = 0; + /* Initialize the LSN information returned to the user. */ + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + /* * Insert persistent information as the first record in every file. * Note that the previous length is wrong for the very first record @@ -131,17 +132,17 @@ __log_put(dblp, lsn, dbt, flags) if (lp->lsn.offset == 0) { t.data = &lp->persist; t.size = sizeof(LOGP); - if ((ret = __log_putr(dblp, + if ((ret = __log_putr(dblp, lsn, &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0) return (ret); - } - /* Initialize the LSN information returned to the user. */ - lsn->file = lp->lsn.file; - lsn->offset = lp->lsn.offset; + /* Update the LSN information returned to the user. */ + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + } - /* Put out the user's record. */ - if ((ret = __log_putr(dblp, dbt, lp->lsn.offset - lp->len)) != 0) + /* Write the application's log record. */ + if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0) return (ret); /* @@ -184,19 +185,6 @@ __log_put(dblp, lsn, dbt, flags) (void)time(&lp->chkpt); lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; } - - /* - * When an application calls the log_flush routine, we need to figure - * out if the current buffer needs to be flushed. The problem is that - * if a record spans buffers, it's possible for the record continued - * in the current buffer to have begun in a previous buffer. Each time - * we write a buffer, we update the first-unwritten LSN to point to the - * first LSN after that written buffer. If we have a spanning record, - * correct that value to be the LSN that started it all, here. - */ - if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off) - lp->uw_lsn = *lsn; - return (0); } @@ -205,8 +193,9 @@ __log_put(dblp, lsn, dbt, flags) * Actually put a record into the log. */ static int -__log_putr(dblp, dbt, prev) +__log_putr(dblp, lsn, dbt, prev) DB_LOG *dblp; + DB_LSN *lsn; const DBT *dbt; u_int32_t prev; { @@ -225,15 +214,15 @@ __log_putr(dblp, dbt, prev) hdr.len = sizeof(HDR) + dbt->size; hdr.cksum = __ham_func4(dbt->data, dbt->size); - if ((ret = __log_fill(dblp, &hdr, sizeof(HDR))) != 0) + if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0) return (ret); + lp->len = sizeof(HDR); lp->lsn.offset += sizeof(HDR); - if ((ret = __log_fill(dblp, dbt->data, dbt->size)) != 0) + if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) return (ret); + lp->len += dbt->size; lp->lsn.offset += dbt->size; - - lp->len = sizeof(HDR) + dbt->size; return (0); } @@ -266,7 +255,7 @@ __log_flush(dblp, lsn) { DB_LSN t_lsn; LOG *lp; - int ret; + int current, ret; ret = 0; lp = dblp->lp; @@ -292,23 +281,27 @@ __log_flush(dblp, lsn) /* * If the LSN is less than the last-sync'd LSN, we're done. Note, * the last-sync LSN saved in s_lsn is the LSN of the first byte - * that has not yet been written to disk, so the test is <, not <=. + * we absolutely know has been written to disk, so the test is <=. */ if (lsn->file < lp->s_lsn.file || - (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset)) + (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset)) return (0); /* * We may need to write the current buffer. We have to write the * current buffer if the flush LSN is greater than or equal to the - * first-unwritten LSN (uw_lsn). If we write the buffer, then we - * update the first-unwritten LSN. + * buffer's starting LSN. */ + current = 0; if (lp->b_off != 0 && - lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset) + lsn->file >= lp->f_lsn.file && lsn->offset >= lp->f_lsn.offset) { if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0) return (ret); + lp->b_off = 0; + current = 1; + } + /* * It's possible that this thread may never have written to this log * file. Acquire a file descriptor if we don't already have one. @@ -323,10 +316,14 @@ __log_flush(dblp, lsn) ++lp->stat.st_scount; /* - * Set the last-synced LSN, the first LSN after the last record - * that we know is on disk. + * Set the last-synced LSN, using the LSN of the current buffer. If + * the current buffer was flushed, we know the LSN of the first byte + * of the buffer is on disk, otherwise, we only know that the LSN of + * the record before the one beginning the current buffer is on disk. */ - lp->s_lsn = lp->uw_lsn; + lp->s_lsn = lp->f_lsn; + if (!current) + --lp->s_lsn.offset; return (0); } @@ -336,8 +333,9 @@ __log_flush(dblp, lsn) * Write information into the log. */ static int -__log_fill(dblp, addr, len) +__log_fill(dblp, lsn, addr, len) DB_LOG *dblp; + DB_LSN *lsn; void *addr; u_int32_t len; { @@ -349,6 +347,15 @@ __log_fill(dblp, addr, len) /* Copy out the data. */ for (lp = dblp->lp; len > 0;) { /* + * If we're beginning a new buffer, note the user LSN to which + * the first byte of the buffer belongs. We have to know this + * when flushing the buffer so that we know if the in-memory + * buffer needs to be flushed. + */ + if (lp->b_off == 0) + lp->f_lsn = *lsn; + + /* * If we're on a buffer boundary and the data is big enough, * copy as many records as we can directly from the data. */ @@ -371,9 +378,12 @@ __log_fill(dblp, addr, len) lp->b_off += nw; /* If we fill the buffer, flush it. */ - if (lp->b_off == sizeof(lp->buf) && - (ret = __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0) - return (ret); + if (lp->b_off == sizeof(lp->buf)) { + if ((ret = + __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0) + return (ret); + lp->b_off = 0; + } } return (0); } @@ -412,14 +422,8 @@ __log_write(dblp, addr, len) if (nw != (int32_t)len) return (EIO); - /* - * Reset the buffer offset, update the seek offset, and update the - * first-unwritten LSN. - */ - lp->b_off = 0; + /* Reset the buffer offset and update the seek offset. */ lp->w_off += len; - lp->uw_lsn.file = lp->lsn.file; - lp->uw_lsn.offset = lp->w_off; /* Update written statistics. */ if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c index a707603eec..578abedcb6 100644 --- a/db2/mp/mp_bh.c +++ b/db2/mp/mp_bh.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_bh.c 10.21 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)mp_bh.c 10.23 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -40,7 +40,6 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) BH *bhp; int *restartp, *wrotep; { - DBT dbt; DB_MPOOLFILE *dbmfp; DB_MPREG *mpreg; @@ -53,7 +52,7 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) * Walk the process' DB_MPOOLFILE list and find a file descriptor for * the file. We also check that the descriptor is open for writing. * If we find a descriptor on the file that's not open for writing, we - * try and upgrade it to make it writeable. + * try and upgrade it to make it writeable. If that fails, we're done. */ LOCKHANDLE(dbmp, dbmp->mutexp); for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); @@ -86,18 +85,34 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) } /* - * Try and open the file; ignore any error, assume it's a permissions - * problem. + * Try and open the file, attaching to the underlying shared area. * * XXX - * There's no negative cache here, so we may repeatedly try and open - * files that we have previously tried (and failed) to open. + * Don't try to attach to temporary files. There are two problems in + * trying to do that. First, if we have different privileges than the + * process that "owns" the temporary file, we might create the backing + * disk file such that the owning process couldn't read/write its own + * buffers, e.g., memp_trickle() running as root creating a file owned + * as root, mode 600. Second, if the temporary file has already been + * created, we don't have any way of finding out what its real name is, + * and, even if we did, it was already unlinked (so that it won't be + * left if the process dies horribly). This decision causes a problem, + * however: if the temporary file consumes the entire buffer cache, + * and the owner doesn't flush the buffers to disk, we could end up + * with resource starvation, and the memp_trickle() thread couldn't do + * anything about it. That's a pretty unlikely scenario, though. + * + * XXX + * There's no negative cache, so we may repeatedly try and open files + * that we have previously tried (and failed) to open. + * + * Ignore any error, assume it's a permissions problem. */ - dbt.size = mfp->pgcookie_len; - dbt.data = R_ADDR(dbmp, mfp->pgcookie_off); - if (__memp_fopen(dbmp, R_ADDR(dbmp, mfp->path_off), - mfp->ftype, 0, 0, mfp->stat.st_pagesize, - mfp->lsn_off, &dbt, R_ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0) + if (F_ISSET(mfp, MP_TEMP)) + return (0); + + if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype, + 0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0) return (0); found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep)); @@ -144,7 +159,7 @@ __memp_pgread(dbmfp, bhp, can_create) UNLOCKHANDLE(dbmp, dbmfp->mutexp); __db_err(dbmp->dbenv, "%s: page %lu doesn't exist, create flag not set", - dbmfp->path, (u_long)bhp->pgno); + __memp_fn(dbmfp), (u_long)bhp->pgno); goto err; } UNLOCKHANDLE(dbmp, dbmfp->mutexp); @@ -270,12 +285,14 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) /* Temporary files may not yet have been created. */ LOCKHANDLE(dbmp, dbmfp->mutexp); - if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv, DB_APP_TMP, - NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) { - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - __db_err(dbenv, "unable to create temporary backing file"); - goto err; - } + if (dbmfp->fd == -1) + if ((ret = __db_appname(dbenv, DB_APP_TMP, + NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) { + UNLOCKHANDLE(dbmp, dbmfp->mutexp); + __db_err(dbenv, + "unable to create temporary backing file"); + goto err; + } /* Write the page out. */ if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) @@ -350,8 +367,8 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) return (0); -syserr: __db_err(dbenv, - "%s: %s failed for page %lu", dbmfp->path, fail, (u_long)bhp->pgno); +syserr: __db_err(dbenv, "%s: %s failed for page %lu", + __memp_fn(dbmfp), fail, (u_long)bhp->pgno); err: UNLOCKBUFFER(dbmp, bhp); LOCKREGION(dbmp); @@ -416,7 +433,7 @@ __memp_pg(dbmfp, bhp, is_pgin) err: UNLOCKHANDLE(dbmp, dbmp->mutexp); __db_err(dbmp->dbenv, "%s: %s failed for page %lu", - dbmfp->path, is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno); + __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno); return (ret); } @@ -462,7 +479,8 @@ __memp_upgrade(dbmp, dbmfp, mfp) DB_MPOOLFILE *dbmfp; MPOOLFILE *mfp; { - int fd; + int fd, ret; + char *rpath; /* * !!! @@ -477,16 +495,24 @@ __memp_upgrade(dbmp, dbmfp, mfp) if (F_ISSET(dbmfp, MP_UPGRADE_FAIL)) return (1); - /* Try the open. */ - if (__db_open(R_ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) { + /* + * Calculate the real name for this file and try to open it read/write. + * We know we have a valid pathname for the file because it's the only + * way we could have gotten a file descriptor of any kind. + */ + if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA, + NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0) + return (ret); + if (__db_open(rpath, 0, 0, 0, &fd) != 0) { F_SET(dbmfp, MP_UPGRADE_FAIL); - return (1); + ret = 1; + } else { + /* Swap the descriptors and set the upgrade flag. */ + (void)__db_close(dbmfp->fd); + dbmfp->fd = fd; + F_SET(dbmfp, MP_UPGRADE); + ret = 0; } - - /* Swap the descriptors and set the upgrade flag. */ - (void)__db_close(dbmfp->fd); - dbmfp->fd = fd; - F_SET(dbmfp, MP_UPGRADE); - - return (0); + FREES(rpath); + return (ret); } diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c index 3f99e60505..1010751c92 100644 --- a/db2/mp/mp_fget.c +++ b/db2/mp/mp_fget.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fget.c 10.30 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)mp_fget.c 10.32 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -38,13 +38,11 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) int flags; void *addrp; { - BH *bhp, *tbhp; + BH *bhp; DB_MPOOL *dbmp; MPOOL *mp; MPOOLFILE *mfp; - db_pgno_t lastpgno; size_t bucket, mf_offset; - off_t size; u_long cnt; int b_incr, b_inserted, readonly_alloc, ret; void *addr; @@ -97,7 +95,7 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) mf_offset = R_OFFSET(dbmp, mfp); addr = NULL; bhp = NULL; - b_incr = b_inserted = readonly_alloc = ret = 0; + b_incr = b_inserted = ret = 0; LOCKREGION(dbmp); @@ -114,11 +112,10 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) * It would be possible to do so by reference counting the open * pages from the mmap, but it's unclear to me that it's worth it. */ - if (dbmfp->addr != NULL && dbmfp->mfp->can_mmap) { - lastpgno = dbmfp->len == 0 ? - 0 : (dbmfp->len - 1) / mfp->stat.st_pagesize; + if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) { + readonly_alloc = 0; if (LF_ISSET(DB_MPOOL_LAST)) - *pgnoaddr = lastpgno; + *pgnoaddr = mfp->last_pgno; else { /* * !!! @@ -128,10 +125,10 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) */ if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW)) readonly_alloc = 1; - else if (*pgnoaddr > lastpgno) { + else if (*pgnoaddr > mfp->last_pgno) { __db_err(dbmp->dbenv, "%s: page %lu doesn't exist", - dbmfp->path, (u_long)*pgnoaddr); + __memp_fn(dbmfp), (u_long)*pgnoaddr); ret = EINVAL; goto err; } @@ -146,79 +143,38 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) } } - /* - * If requesting the last page or a new page, find the last page. The - * tricky thing is that the user may have created a page already that's - * after any page that exists in the file. - */ - if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { - /* - * Temporary files may not yet have been created. - * - * Don't lock -- there are no atomicity issues for stat(2). - */ - if (dbmfp->fd == -1) - size = 0; - else if ((ret = - __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) { - __db_err(dbmp->dbenv, - "%s: %s", dbmfp->path, strerror(ret)); - goto err; - } - - *pgnoaddr = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize; + /* Check if requesting the last page or a new page. */ + if (LF_ISSET(DB_MPOOL_LAST)) + *pgnoaddr = mfp->last_pgno; - /* - * Walk the list of BH's, looking for later pages. Save the - * pointer if a later page is found so that we don't have to - * search the list twice. - * - * If requesting a new page, return the page one after the last - * page -- which we'll have to create. - */ - for (tbhp = SH_TAILQ_FIRST(&mp->bhq, __bh); - tbhp != NULL; tbhp = SH_TAILQ_NEXT(tbhp, q, __bh)) - if (tbhp->pgno >= *pgnoaddr && - tbhp->mf_offset == mf_offset) { - bhp = tbhp; - *pgnoaddr = bhp->pgno; - } - if (LF_ISSET(DB_MPOOL_NEW)) - ++*pgnoaddr; - } - - /* If we already found the right buffer, return it. */ - if (LF_ISSET(DB_MPOOL_LAST) && bhp != NULL) { - addr = bhp->buf; - goto found; + if (LF_ISSET(DB_MPOOL_NEW)) { + *pgnoaddr = mfp->last_pgno + 1; + goto alloc; } - /* If we haven't checked the BH hash bucket queue, do the search. */ - if (!LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { - bucket = BUCKET(mp, mf_offset, *pgnoaddr); - for (cnt = 0, - bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { - ++cnt; - if (bhp->pgno == *pgnoaddr && - bhp->mf_offset == mf_offset) { - addr = bhp->buf; - ++mp->stat.st_hash_searches; - if (cnt > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = cnt; - mp->stat.st_hash_examined += cnt; - goto found; - } - } - if (cnt != 0) { + /* Check the BH hash bucket queue. */ + bucket = BUCKET(mp, mf_offset, *pgnoaddr); + for (cnt = 0, + bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { + ++cnt; + if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) { + addr = bhp->buf; ++mp->stat.st_hash_searches; if (cnt > mp->stat.st_hash_longest) mp->stat.st_hash_longest = cnt; mp->stat.st_hash_examined += cnt; + goto found; } } + if (cnt != 0) { + ++mp->stat.st_hash_searches; + if (cnt > mp->stat.st_hash_longest) + mp->stat.st_hash_longest = cnt; + mp->stat.st_hash_examined += cnt; + } - /* +alloc: /* * Allocate a new buffer header and data space, and mark the contents * as useless. */ @@ -300,7 +256,7 @@ found: /* Increment the reference count. */ if (bhp->ref == UINT16_T_MAX) { __db_err(dbmp->dbenv, "%s: too many references to page %lu", - dbmfp->path, bhp->pgno); + __memp_fn(dbmfp), bhp->pgno); ret = EINVAL; goto err; } @@ -346,6 +302,14 @@ found: /* Increment the reference count. */ ++mfp->stat.st_cache_hit; } + /* + * If we're returning a page after our current notion of the last-page, + * update our information. Note, there's no way to un-instantiate this + * page, it's going to exist whether it's returned to us dirty or not. + */ + if (bhp->pgno > mfp->last_pgno) + mfp->last_pgno = bhp->pgno; + mapret: LOCKHANDLE(dbmp, dbmfp->mutexp); ++dbmfp->pinref; UNLOCKHANDLE(dbmp, dbmfp->mutexp); diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c index de59c9ea9b..bdc4713863 100644 --- a/db2/mp/mp_fopen.c +++ b/db2/mp/mp_fopen.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fopen.c 10.30 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)mp_fopen.c 10.32 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -28,8 +28,8 @@ static const char sccsid[] = "@(#)mp_fopen.c 10.30 (Sleepycat) 10/25/97"; #include "common_ext.h" static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); -static int __memp_mf_open __P((DB_MPOOL *, - DB_MPOOLFILE *, int, size_t, int, DBT *, u_int8_t *, int, MPOOLFILE **)); +static int __memp_mf_open __P((DB_MPOOL *, DB_MPOOLFILE *, + const char *, int, size_t, off_t, int, DBT *, u_int8_t *, MPOOLFILE **)); /* * memp_fopen -- @@ -53,7 +53,13 @@ memp_fopen(dbmp, path, ftype, "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0) return (ret); - return (__memp_fopen(dbmp, path, ftype, + /* Require a non-zero pagesize. */ + if (pagesize == 0) { + __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified"); + return (EINVAL); + } + + return (__memp_fopen(dbmp, NULL, path, ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp)); } @@ -61,13 +67,14 @@ memp_fopen(dbmp, path, ftype, * __memp_fopen -- * Open a backing file for the memory pool; internal version. * - * PUBLIC: int __memp_fopen __P((DB_MPOOL *, const char *, int, int, - * PUBLIC: int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); + * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int, + * PUBLIC: int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); */ int -__memp_fopen(dbmp, path, +__memp_fopen(dbmp, mfp, path, ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp) DB_MPOOL *dbmp; + MPOOLFILE *mfp; const char *path; int ftype, flags, mode, lsn_offset, needlock; size_t pagesize; @@ -77,24 +84,27 @@ __memp_fopen(dbmp, path, { DB_ENV *dbenv; DB_MPOOLFILE *dbmfp; - MPOOLFILE *mfp; off_t size; int ret; + u_int8_t idbuf[DB_FILE_ID_LEN]; + char *rpath; + /* + * XXX + * If mfp is provided, the following arguments do NOT need to be + * specified: + * lsn_offset + * pgcookie + * fileid + */ dbenv = dbmp->dbenv; ret = 0; - - /* Require a non-zero pagesize. */ - if (pagesize == 0) { - __db_err(dbenv, "memp_fopen: pagesize not specified"); - return (EINVAL); - } + rpath = NULL; /* Allocate and initialize the per-process structure. */ if ((dbmfp = (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { - __db_err(dbenv, "%s: %s", - path == NULL ? TEMPORARY : path, strerror(ENOMEM)); + __db_err(dbenv, "memp_fopen: %s", strerror(ENOMEM)); return (ENOMEM); } dbmfp->dbmp = dbmp; @@ -109,54 +119,66 @@ __memp_fopen(dbmp, path, ret = EINVAL; goto err; } - dbmfp->path = (char *)TEMPORARY; - F_SET(dbmfp, MP_PATH_TEMP); + size = 0; } else { - /* Calculate the real name for this file. */ + /* Get the real name for this file and open it. */ if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, path, NULL, &dbmfp->path)) != 0) + DB_APP_DATA, NULL, path, NULL, &rpath)) != 0) goto err; - F_SET(dbmfp, MP_PATH_ALLOC); - - - /* Open the file. */ - if ((ret = __db_open(dbmfp->path, + if ((ret = __db_open(rpath, LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { - __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret)); + __db_err(dbenv, "%s: %s", rpath, strerror(ret)); goto err; } /* Don't permit files that aren't a multiple of the pagesize. */ - if ((ret = - __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) { - __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret)); + if ((ret = __db_ioinfo(rpath, dbmfp->fd, &size, NULL)) != 0) { + __db_err(dbenv, "%s: %s", rpath, strerror(ret)); goto err; } if (size % pagesize) { __db_err(dbenv, "%s: file size not a multiple of the pagesize", - dbmfp->path); + rpath); ret = EINVAL; goto err; } + + /* + * Get the file id if we weren't given one. Generated file id's + * don't use timestamps, otherwise there'd be no chance of any + * other process joining the party. + */ + if (mfp == NULL && fileid == NULL) { + if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0) + goto err; + fileid = idbuf; + } } /* - * Find/allocate the shared file objects. This includes allocating - * space for the per-process thread lock. + * If we weren't provided an underlying shared object to join with, + * find/allocate the shared file objects. Also allocate space for + * for the per-process thread lock. */ if (needlock) LOCKREGION(dbmp); - ret = __memp_mf_open(dbmp, dbmfp, ftype, pagesize, - lsn_offset, pgcookie, fileid, F_ISSET(dbmfp, MP_PATH_TEMP), &mfp); + + if (mfp == NULL) + ret = __memp_mf_open(dbmp, dbmfp, path, + ftype, pagesize, size, lsn_offset, pgcookie, fileid, &mfp); + else { + ++mfp->ref; + ret = 0; + } if (ret == 0 && F_ISSET(dbmp, MP_LOCKHANDLE) && (ret = __memp_ralloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0) LOCKINIT(dbmp, dbmfp->mutexp); + if (needlock) UNLOCKREGION(dbmp); - if (ret != 0) goto err; @@ -184,25 +206,25 @@ __memp_fopen(dbmp, path, * flatly impossible. Hope that mmap fails if the file is too large. */ #define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */ - if (mfp->can_mmap) { + if (F_ISSET(mfp, MP_CAN_MMAP)) { if (!F_ISSET(dbmfp, MP_READONLY)) - mfp->can_mmap = 0; + F_CLR(mfp, MP_CAN_MMAP); if (path == NULL) - mfp->can_mmap = 0; + F_CLR(mfp, MP_CAN_MMAP); if (ftype != 0) - mfp->can_mmap = 0; + F_CLR(mfp, MP_CAN_MMAP); if (LF_ISSET(DB_NOMMAP)) - mfp->can_mmap = 0; + F_CLR(mfp, MP_CAN_MMAP); if (size > (dbenv == NULL || dbenv->mp_mmapsize == 0 ? DB_MAXMMAPSIZE : (off_t)dbenv->mp_mmapsize)) - mfp->can_mmap = 0; + F_CLR(mfp, MP_CAN_MMAP); } dbmfp->addr = NULL; - if (mfp->can_mmap) { + if (F_ISSET(mfp, MP_CAN_MMAP)) { dbmfp->len = size; if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) { - mfp->can_mmap = 0; dbmfp->addr = NULL; + F_CLR(mfp, MP_CAN_MMAP); } } @@ -217,8 +239,8 @@ err: /* * Note that we do not have to free the thread mutex, because we * never get to here after we have successfully allocated it. */ - if (F_ISSET(dbmfp, MP_PATH_ALLOC)) - FREES(dbmfp->path); + if (rpath != NULL) + FREES(rpath); if (dbmfp->fd != -1) (void)__db_close(dbmfp->fd); if (dbmfp != NULL) @@ -231,78 +253,80 @@ err: /* * Open an MPOOLFILE. */ static int -__memp_mf_open(dbmp, dbmfp, - ftype, pagesize, lsn_offset, pgcookie, fileid, istemp, retp) +__memp_mf_open(dbmp, dbmfp, path, + ftype, pagesize, size, lsn_offset, pgcookie, fileid, retp) DB_MPOOL *dbmp; DB_MPOOLFILE *dbmfp; - int ftype, lsn_offset, istemp; + const char *path; + int ftype, lsn_offset; size_t pagesize; + off_t size; DBT *pgcookie; u_int8_t *fileid; MPOOLFILE **retp; { MPOOLFILE *mfp; int ret; - u_int8_t idbuf[DB_FILE_ID_LEN]; void *p; - /* Temporary files can't match previous files. */ - if (istemp) - goto alloc; +#define ISTEMPORARY (path == NULL) /* - * Get the file id if we weren't give one. Generated file id's don't - * use timestamps, otherwise there'd be no chance of anyone joining - * the party. + * Walk the list of MPOOLFILE's, looking for a matching file. + * Temporary files can't match previous files. */ - if (fileid == NULL) { - if ((ret = - __db_fileid(dbmp->dbenv, dbmfp->path, 0, idbuf)) != 0) - return (ret); - fileid = idbuf; - } - - /* Walk the list of MPOOLFILE's, looking for a matching file. */ - for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) - if (!memcmp(fileid, - R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { - if (ftype != mfp->ftype || - pagesize != mfp->stat.st_pagesize) { - __db_err(dbmp->dbenv, - "%s: ftype or pagesize changed", - dbmfp->path); - ret = EINVAL; - mfp = NULL; - goto ret1; + if (!ISTEMPORARY) + for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); + mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { + if (F_ISSET(mfp, MP_TEMP)) + continue; + if (!memcmp(fileid, + R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { + if (ftype != mfp->ftype || + pagesize != mfp->stat.st_pagesize) { + __db_err(dbmp->dbenv, + "%s: ftype or pagesize changed", + path); + return (EINVAL); + } + + /* Found it: increment the reference count. */ + ++mfp->ref; + *retp = mfp; + return (0); } - /* Found it: increment the reference count. */ - ++mfp->ref; - goto ret1; } /* Allocate a new MPOOLFILE. */ -alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) - goto ret1; + if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) + return (ret); + *retp = mfp; /* Initialize the structure. */ memset(mfp, 0, sizeof(MPOOLFILE)); mfp->ref = 1; mfp->ftype = ftype; - mfp->can_mmap = 1; mfp->lsn_off = lsn_offset; - mfp->stat.st_pagesize = pagesize; - /* Copy the file path into shared memory. */ - if ((ret = __memp_ralloc(dbmp, - strlen(dbmfp->path) + 1, &mfp->path_off, &p)) != 0) - goto err; - memcpy(p, dbmfp->path, strlen(dbmfp->path) + 1); + /* + * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget, + * we have to know the last page in the file. Figure it out and save + * it away. + */ + mfp->stat.st_pagesize = pagesize; + mfp->last_pgno = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize; - /* Copy the file identification string into shared memory. */ - if (istemp) - mfp->fileid_off = 0; + F_SET(mfp, MP_CAN_MMAP); + if (ISTEMPORARY) + F_SET(mfp, MP_TEMP); else { + /* Copy the file path into shared memory. */ + if ((ret = __memp_ralloc(dbmp, + strlen(path) + 1, &mfp->path_off, &p)) != 0) + goto err; + memcpy(p, path, strlen(path) + 1); + + /* Copy the file identification string into shared memory. */ if ((ret = __memp_ralloc(dbmp, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) goto err; @@ -328,15 +352,13 @@ alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) err: if (mfp->path_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); - if (!istemp) + if (mfp->fileid_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); if (mfp != NULL) __db_shalloc_free(dbmp->addr, mfp); mfp = NULL; } - -ret1: *retp = mfp; return (0); } @@ -357,7 +379,7 @@ memp_fclose(dbmfp) /* Complain if pinned blocks never returned. */ if (dbmfp->pinref != 0) __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned", - dbmfp->path, (u_long)dbmfp->pinref); + __memp_fn(dbmfp), (u_long)dbmfp->pinref); /* Remove the DB_MPOOLFILE structure from the list. */ LOCKHANDLE(dbmp, dbmp->mutexp); @@ -370,18 +392,18 @@ memp_fclose(dbmfp) /* Discard any mmap information. */ if (dbmfp->addr != NULL && (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0) - __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret)); + __db_err(dbmp->dbenv, + "%s: %s", __memp_fn(dbmfp), strerror(ret)); /* Close the file; temporary files may not yet have been created. */ if (dbmfp->fd != -1 && (t_ret = __db_close(dbmfp->fd)) != 0) { - __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(t_ret)); + __db_err(dbmp->dbenv, + "%s: %s", __memp_fn(dbmfp), strerror(t_ret)); if (ret != 0) t_ret = ret; } /* Free memory. */ - if (F_ISSET(dbmfp, MP_PATH_ALLOC)) - FREES(dbmfp->path); if (dbmfp->mutexp != NULL) { LOCKREGION(dbmp); __db_shalloc_free(dbmp->addr, dbmfp->mutexp); @@ -434,7 +456,8 @@ __memp_mf_close(dbmp, dbmfp) if (F_ISSET(bhp, BH_DIRTY)) __db_err(dbmp->dbenv, "%s: close: pgno %lu left dirty; ref %lu", - dbmfp->path, (u_long)bhp->pgno, (u_long)bhp->ref); + __memp_fn(dbmfp), + (u_long)bhp->pgno, (u_long)bhp->ref); #endif if (bhp->mf_offset == mf_offset) { @@ -452,7 +475,8 @@ __memp_mf_close(dbmp, dbmfp) /* Free the space. */ __db_shalloc_free(dbmp->addr, mfp); - __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); + if (mfp->path_off != 0) + __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); if (mfp->fileid_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); if (mfp->pgcookie_off != 0) diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c index 892f179d3a..38e86b8ac5 100644 --- a/db2/mp/mp_fput.c +++ b/db2/mp/mp_fput.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fput.c 10.14 (Sleepycat) 10/5/97"; +static const char sccsid[] = "@(#)mp_fput.c 10.16 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -54,7 +54,7 @@ memp_fput(dbmfp, pgaddr, flags) if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) { __db_err(dbmp->dbenv, "%s: dirty flag set for readonly file page", - dbmfp->path); + __memp_fn(dbmfp)); return (EACCES); } } @@ -64,7 +64,7 @@ memp_fput(dbmfp, pgaddr, flags) if (dbmfp->pinref == 0) __db_err(dbmp->dbenv, "%s: put: more blocks returned than retrieved", - dbmfp->path); + __memp_fn(dbmfp)); else --dbmfp->pinref; UNLOCKHANDLE(dbmp, dbmfp->mutexp); diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c index a7d2706008..2eff7dd74c 100644 --- a/db2/mp/mp_fset.c +++ b/db2/mp/mp_fset.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fset.c 10.10 (Sleepycat) 10/5/97"; +static const char sccsid[] = "@(#)mp_fset.c 10.12 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -43,20 +43,21 @@ memp_fset(dbmfp, pgaddr, flags) mp = dbmp->mp; /* Validate arguments. */ - if (flags != 0) { - if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags, - DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0) - return (ret); - if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset", - flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0) - return (ret); + if (flags == 0) + return (__db_ferr(dbmp->dbenv, "memp_fset", 1)); - if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) { - __db_err(dbmp->dbenv, - "%s: dirty flag set for readonly file page", - dbmfp->path); - return (EACCES); - } + if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags, + DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0) + return (ret); + if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset", + flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0) + return (ret); + + if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) { + __db_err(dbmp->dbenv, + "%s: dirty flag set for readonly file page", + __memp_fn(dbmfp)); + return (EACCES); } /* Convert the page address to a buffer header. */ diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c index 4c19739ebd..ca81f8d6d6 100644 --- a/db2/mp/mp_open.c +++ b/db2/mp/mp_open.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_open.c 10.15 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -66,17 +66,6 @@ memp_open(path, flags, mode, dbenv, retp) F_SET(dbmp, MP_ISPRIVATE); /* - * XXX - * HP-UX won't permit mutexes to live in anything but shared memory. - * So, we have to instantiate the shared mpool region file on that - * architecture, regardless. If this turns out to be a performance - * problem, we could probably use anonymous memory instead. - */ -#if defined(__hppa) - F_CLR(dbmp, MP_ISPRIVATE); -#endif - - /* * Map in the region. We do locking regardless, as portions of it are * implemented in common code (if we put the region in a file, that is). */ diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c index 01f0920df4..6ff1131b6e 100644 --- a/db2/mp/mp_pr.c +++ b/db2/mp/mp_pr.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_pr.c 10.18 (Sleepycat) 11/1/97"; +static const char sccsid[] = "@(#)mp_pr.c 10.20 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -100,7 +100,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc) mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - name = R_ADDR(dbmp, mfp->path_off); + name = __memp_fns(dbmp, mfp); nlen = strlen(name); len = sizeof(DB_MPOOL_FSTAT) + nlen + 1; if ((*tfsp = db_malloc == NULL ? @@ -120,6 +120,37 @@ memp_stat(dbmp, gspp, fspp, db_malloc) } /* + * __memp_fn -- + * On errors we print whatever is available as the file name. + * + * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *)); + */ +char * +__memp_fn(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + return (__memp_fns(dbmfp->dbmp, dbmfp->mfp)); +} + +/* + * __memp_fns -- + * On errors we print whatever is available as the file name. + * + * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); + * + */ +char * +__memp_fns(dbmp, mfp) + DB_MPOOL *dbmp; + MPOOLFILE *mfp; +{ + if (mfp->path_off == 0) + return ((char *)"temporary"); + + return ((char *)R_ADDR(dbmp, mfp->path_off)); +} + +/* * __memp_debug -- * Display MPOOL structures. * @@ -152,7 +183,7 @@ __memp_debug(dbmp, fp, data) (void)fprintf(fp, "%lu process-local files\n", cnt); for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) { - (void)fprintf(fp, "%s\n", dbmfp->path); + (void)fprintf(fp, "%s\n", __memp_fn(dbmfp)); __memp_pdbmf(fp, dbmfp, data); } @@ -285,7 +316,7 @@ __memp_pmf(fp, mfp, data) return; (void)fprintf(fp, " %d references; %s; pagesize: %lu\n", mfp->ref, - mfp->can_mmap ? "mmap" : "read/write", + F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write", (u_long)mfp->stat.st_pagesize); } diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c index 6b2f93125c..c20e669749 100644 --- a/db2/mp/mp_region.c +++ b/db2/mp/mp_region.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_region.c 10.16 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)mp_region.c 10.18 (Sleepycat) 11/29/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -224,17 +224,28 @@ retry: if (LF_ISSET(DB_CREATE)) { * the file descriptor for locking. However, it should not * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't * defined. + * + * XXX + * HP-UX won't permit mutexes to live in anything but shared + * memory. So, instantiate the shared mpool region file on + * that architecture, regardless. If this turns out to be a + * performance problem, we could use anonymous memory instead. */ - if (F_ISSET(dbmp, MP_ISPRIVATE)) { +#if !defined(__hppa) + if (F_ISSET(dbmp, MP_ISPRIVATE)) if ((dbmp->maddr = __db_malloc(rlen)) == NULL) ret = ENOMEM; - else + else { + F_SET(dbmp, MP_MALLOC); ret = __db_rinit(dbmp->dbenv, dbmp->maddr, 0, rlen, 0); - } else + } + else +#endif ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path, - DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd, - &dbmp->maddr); + DB_DEFAULT_MPOOL_FILE, mode, rlen, + F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0, + &fd, &dbmp->maddr); if (ret == 0) { /* Put the MPOOL structure first in the region. */ mp = dbmp->maddr; @@ -315,7 +326,7 @@ retry: if (LF_ISSET(DB_CREATE)) { dbmp->fd = fd; /* If we locked the region, release it now. */ - if (!F_ISSET(dbmp, MP_ISPRIVATE)) + if (!F_ISSET(dbmp, MP_MALLOC)) UNLOCKREGION(dbmp); return (0); @@ -339,7 +350,7 @@ int __memp_rclose(dbmp) DB_MPOOL *dbmp; { - if (F_ISSET(dbmp, MP_ISPRIVATE)) { + if (F_ISSET(dbmp, MP_MALLOC)) { __db_free(dbmp->maddr); return (0); } diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c index 2f042df9e1..47a7f2ebca 100644 --- a/db2/mp/mp_sync.c +++ b/db2/mp/mp_sync.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_sync.c 10.15 (Sleepycat) 11/1/97"; +static const char sccsid[] = "@(#)mp_sync.c 10.17 (Sleepycat) 11/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -201,7 +201,7 @@ memp_sync(dbmp, lsnp) */ if (!wrote) { __db_err(dbenv, "%s: unable to flush page: %lu", - R_ADDR(dbmp, mfp->path_off), + __memp_fns(dbmp, mfp), (u_long)bharray[next]->pgno); ret = EPERM; goto err; @@ -244,16 +244,24 @@ memp_fsync(dbmfp) size_t mf_offset; int ar_cnt, cnt, nalloc, next, pincnt, notused, ret, wrote; + dbmp = dbmfp->dbmp; + /* * If this handle doesn't have a file descriptor that's open for * writing, or if the file is a temporary, there's no reason to * proceed further. */ - if (F_ISSET(dbmfp, MP_READONLY | MP_PATH_TEMP)) + if (F_ISSET(dbmfp, MP_READONLY)) return (0); ret = 0; - dbmp = dbmfp->dbmp; + LOCKREGION(dbmp); + if (F_ISSET(dbmfp->mfp, MP_TEMP)) + ret = 1; + UNLOCKREGION(dbmp); + if (ret) + return (0); + mf_offset = R_OFFSET(dbmp, dbmfp->mfp); /* @@ -407,18 +415,26 @@ loop: total = mp->stat.st_page_clean + mp->stat.st_page_dirty; continue; mfp = R_ADDR(dbmp, bhp->mf_offset); + + /* + * We can't write to temporary files -- see the comment in + * mp_bh.c:__memp_bhwrite(). + */ + if (F_ISSET(mfp, MP_TEMP)) + continue; + if ((ret = __memp_bhwrite(dbmp, mfp, bhp, ¬used, &wrote)) != 0) goto err; /* - * Any process syncing the shared memory buffer pool - * had better be able to write to any underlying file. - * Be understanding, but firm, on this point. + * Any process syncing the shared memory buffer pool had better + * be able to write to any underlying file. Be understanding, + * but firm, on this point. */ if (!wrote) { __db_err(dbmp->dbenv, "%s: unable to flush page: %lu", - R_ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno); + __memp_fns(dbmp, mfp), (u_long)bhp->pgno); ret = EPERM; goto err; } diff --git a/db2/mutex/README b/db2/mutex/README index 30d6b6a7d1..fceeef7ed8 100644 --- a/db2/mutex/README +++ b/db2/mutex/README @@ -1,4 +1,4 @@ -# @(#)README 10.1 (Sleepycat) 4/12/97 +# @(#)README 10.2 (Sleepycat) 11/25/97 Resource locking routines: lock based on a db_mutex_t. All this gunk (including trying to make assembly code portable), is necessary because @@ -11,9 +11,9 @@ information. If HAVE_SPINLOCKS is defined (i.e. we know how to do test-and-sets for this compiler/architecture combination), we try and lock the resource tsl -TSL_DEFAULT_SPINS times. If we can't acquire the lock that way, we use -a system call to sleep for 10ms, 20ms, 40ms, etc. (The time is bounded -at 1 second, just in case.) Using the timer backoff means that there are +__db_tsl_spins times. If we can't acquire the lock that way, we use a +system call to sleep for 10ms, 20ms, 40ms, etc. (The time is bounded at +1 second, just in case.) Using the timer backoff means that there are two assumptions: that locks are held for brief periods (never over system calls or I/O) and that locks are not hotly contested. diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c index 7c8ea6ebd1..6e87c5f215 100644 --- a/db2/mutex/mutex.c +++ b/db2/mutex/mutex.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mutex.c 10.28 (Sleepycat) 10/31/97"; +static const char sccsid[] = "@(#)mutex.c 10.29 (Sleepycat) 11/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -101,12 +101,6 @@ static const char sccsid[] = "@(#)mutex.c 10.28 (Sleepycat) 10/31/97"; #endif /* HAVE_SPINLOCKS */ -#ifdef MORE_THAN_ONE_PROCESSOR -#define TSL_DEFAULT_SPINS 5 /* Default spins before block. */ -#else -#define TSL_DEFAULT_SPINS 1 /* Default spins before block. */ -#endif - /* * __db_mutex_init -- * Initialize a DB mutex structure. @@ -130,6 +124,7 @@ __db_mutex_init(mp, off) #ifdef HAVE_SPINLOCKS TSL_INIT(&mp->tsl_resource); + mp->spins = __os_spin(); #else mp->off = off; #endif @@ -155,11 +150,8 @@ __db_mutex_lock(mp, fd) int nspins; for (usecs = MS(10);;) { - /* - * Try and acquire the uncontested resource lock for - * TSL_DEFAULT_SPINS. - */ - for (nspins = TSL_DEFAULT_SPINS; nspins > 0; --nspins) + /* Try and acquire the uncontested resource lock for N spins. */ + for (nspins = mp->spins; nspins > 0; --nspins) if (TSL_SET(&mp->tsl_resource)) { #ifdef DEBUG if (mp->pid != 0) { diff --git a/db2/os/os_func.c b/db2/os/os_config.c index afd40f4624..ecb4f1c2e7 100644 --- a/db2/os/os_func.c +++ b/db2/os/os_config.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_func.c 10.4 (Sleepycat) 10/28/97"; +static const char sccsid[] = "@(#)os_config.c 10.9 (Sleepycat) 11/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -31,7 +31,6 @@ static const char sccsid[] = "@(#)os_func.c 10.4 (Sleepycat) 10/28/97"; #define imported #endif -imported extern void *calloc __P((size_t, size_t)); imported extern int close __P((int)); imported extern void free __P((void *)); imported extern int fsync __P((int)); @@ -42,16 +41,16 @@ imported extern char *strdup __P((const char *)); imported extern void *realloc __P((void *, size_t)); imported extern int unlink __P((const char *)); imported extern ssize_t write __P((int, const void *, size_t)); +imported extern void *memset __P((void *, int, size_t)); /* * __db_jump -- * This list of interfaces that applications can replace. In some * cases, the user is permitted to replace the standard ANSI C or - * POSIX 1003.1 call, e.g., calloc or read. In others, we provide + * POSIX 1003.1 call, e.g., malloc or read. In others, we provide * a local interface to the functionality, e.g., __os_map. */ struct __db_jumptab __db_jump = { - calloc, /* DB_FUNC_CALLOC */ close, /* DB_FUNC_CLOSE */ __os_dirfree, /* DB_FUNC_DIRFREE */ __os_dirlist, /* DB_FUNC_DIRLIST */ @@ -73,9 +72,11 @@ struct __db_jumptab __db_jump = { NULL /* DB_FUNC_YIELD */ }; +int __db_tsl_spins; /* DB_TSL_SPINS */ + /* * db_jump_set -- - * Replace an interface. + * Replace functions for the DB package. */ int db_jump_set(func, which) @@ -84,70 +85,148 @@ db_jump_set(func, which) { switch (which) { case DB_FUNC_CALLOC: - __db_calloc = (void *(*) __P((size_t, size_t)))func; - break; + /* + * XXX + * Obsolete, calloc is no longer called by DB. + */ + break; case DB_FUNC_CLOSE: - __os_close = (int (*) __P((int)))func; + __db_jump.db_close = (int (*) __P((int)))func; break; case DB_FUNC_DIRFREE: - __db_dirfree = (void (*) __P((char **, int)))func; + __db_jump.db_dirfree = (void (*) __P((char **, int)))func; break; case DB_FUNC_DIRLIST: - __db_dirlist = + __db_jump.db_dirlist = (int (*) __P((const char *, char ***, int *)))func; break; case DB_FUNC_EXISTS: - __db_exists = (int (*) __P((const char *, int *)))func; + __db_jump.db_exists = (int (*) __P((const char *, int *)))func; break; case DB_FUNC_FREE: - __db_free = (void (*) __P((void *)))func; + __db_jump.db_free = (void (*) __P((void *)))func; break; case DB_FUNC_FSYNC: - __os_fsync = (int (*) __P((int)))func; + __db_jump.db_fsync = (int (*) __P((int)))func; break; case DB_FUNC_IOINFO: - __db_ioinfo = + __db_jump.db_ioinfo = (int (*) __P((const char *, int, off_t *, off_t *)))func; break; case DB_FUNC_MALLOC: - __db_malloc = (void *(*) __P((size_t)))func; + __db_jump.db_malloc = (void *(*) __P((size_t)))func; break; case DB_FUNC_MAP: - __db_map = (int (*) __P((int, size_t, int, int, void **)))func; + __db_jump.db_map = + (int (*) __P((int, size_t, int, int, void **)))func; break; case DB_FUNC_OPEN: - __os_open = (int (*) __P((const char *, int, ...)))func; + __db_jump.db_open = (int (*) __P((const char *, int, ...)))func; break; case DB_FUNC_READ: - __os_read = (ssize_t (*) __P((int, void *, size_t)))func; + __db_jump.db_read = + (ssize_t (*) __P((int, void *, size_t)))func; break; case DB_FUNC_REALLOC: - __db_realloc = (void *(*) __P((void *, size_t)))func; + __db_jump.db_realloc = (void *(*) __P((void *, size_t)))func; break; case DB_FUNC_SEEK: - __db_seek = + __db_jump.db_seek = (int (*) __P((int, size_t, db_pgno_t, u_long, int)))func; break; case DB_FUNC_SLEEP: - __db_sleep = (int (*) __P((u_long, u_long)))func; + __db_jump.db_sleep = (int (*) __P((u_long, u_long)))func; break; case DB_FUNC_STRDUP: - __db_strdup = (char *(*) __P((const char *)))func; + __db_jump.db_strdup = (char *(*) __P((const char *)))func; break; case DB_FUNC_UNLINK: - __os_unlink = (int (*) __P((const char *)))func; + __db_jump.db_unlink = (int (*) __P((const char *)))func; break; case DB_FUNC_UNMAP: - __db_unmap = (int (*) __P((void *, size_t)))func; + __db_jump.db_unmap = (int (*) __P((void *, size_t)))func; break; case DB_FUNC_WRITE: - __os_write = (ssize_t (*) __P((int, const void *, size_t)))func; + __db_jump.db_write = + (ssize_t (*) __P((int, const void *, size_t)))func; break; case DB_FUNC_YIELD: - __db_yield = (int (*) __P((void)))func; + __db_jump.db_yield = (int (*) __P((void)))func; + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * db_value_set -- + * Replace values for the DB package. + */ +int +db_value_set(value, which) + int value, which; +{ + switch (which) { + case DB_TSL_SPINS: + if (value <= 0) + return (EINVAL); + __db_tsl_spins = value; break; default: return (EINVAL); } return (0); } + +/* + * XXX + * Correct for systems that return NULL when you allocate 0 bytes of memory. + * There are several places in DB where we allocate the number of bytes held + * by the key/data item, and it can be 0. Correct here so that malloc never + * returns a NULL for that reason. + */ +/* + * __db_calloc -- + * The calloc(3) function for DB. + * + * PUBLIC: void *__db_calloc __P((size_t, size_t)); + */ +void * +__db_calloc(num, size) + size_t num, size; +{ + void *p; + + size *= num; + if ((p = __db_jump.db_malloc(size == 0 ? 1 : size)) != NULL) + memset(p, 0, size); + return (p); +} + +/* + * __db_malloc -- + * The malloc(3) function for DB. + * + * PUBLIC: void *__db_malloc __P((size_t)); + */ +void * +__db_malloc(size) + size_t size; +{ + return (__db_jump.db_malloc(size == 0 ? 1 : size)); +} + +/* + * __db_realloc -- + * The realloc(3) function for DB. + * + * PUBLIC: void *__db_realloc __P((void *, size_t)); + */ +void * +__db_realloc(ptr, size) + void *ptr; + size_t size; +{ + return (__db_jump.db_realloc(ptr, size == 0 ? 1 : size)); +} diff --git a/db2/os/os_open.c b/db2/os/os_open.c index 05784e4810..a628765556 100644 --- a/db2/os/os_open.c +++ b/db2/os/os_open.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_open.c 10.19 (Sleepycat) 10/28/97"; +static const char sccsid[] = "@(#)os_open.c 10.20 (Sleepycat) 11/27/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -38,6 +38,11 @@ __db_open(name, arg_flags, ok_flags, mode, fdp) return (EINVAL); flags = 0; + + /* + * DB requires the semantic that two files opened at the same time + * with O_CREAT and O_EXCL set will return failure in at least one. + */ if (arg_flags & DB_CREATE) flags |= O_CREAT; diff --git a/db2/os/os_spin.c b/db2/os/os_spin.c new file mode 100644 index 0000000000..fb693c2848 --- /dev/null +++ b/db2/os/os_spin.c @@ -0,0 +1,56 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_spin.c 10.3 (Sleepycat) 11/25/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <limits.h> +#include <unistd.h> +#endif + +#include "db_int.h" + +/* + * __os_spin -- + * Return the number of default spins before blocking. + * + * PUBLIC: int __os_spin __P((void)); + */ +int +__os_spin() +{ + extern int __db_tsl_spins; + + /* If the application specified the spins, use its value. */ + if (__db_tsl_spins != 0) + return (__db_tsl_spins); + + /* + * XXX + * Sysconf: Solaris uses _SC_NPROCESSORS_ONLN to return the number + * of online processors. I don't know if this call is portable or + * not. + */ +#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) + { + long sys_val; + + sys_val = sysconf(_SC_NPROCESSORS_ONLN); + if (sys_val > 0) + return (sys_val * 50); + } +#endif + + /* Default to a single processor. */ + return (1); +} diff --git a/db2/txn/txn.c b/db2/txn/txn.c index 55423f0470..e7a1798350 100644 --- a/db2/txn/txn.c +++ b/db2/txn/txn.c @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)txn.c 10.35 (Sleepycat) 11/2/97"; +static const char sccsid[] = "@(#)txn.c 10.37 (Sleepycat) 11/28/97"; #endif /* not lint */ @@ -101,11 +101,9 @@ __txn_create(dbenv, path, mode) maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 1000; (void)time(&now); - ret = __db_rcreate(dbenv, DB_APP_NONE, path, - DEFAULT_TXN_FILE, mode, TXN_REGION_SIZE(maxtxns), &fd, &txn_region); - /* Region may have existed. If it didn't, the open will fail. */ - if (ret != 0) + if ((ret = __db_rcreate(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE, + mode, TXN_REGION_SIZE(maxtxns), 0, &fd, &txn_region)) != 0) return (ret); txn_region->magic = DB_TXNMAGIC; @@ -315,7 +313,10 @@ err: return (ret); } -/* The db_txn(3) man page describes txn_commit. */ +/* + * txn_commit -- + * Commit a transaction. + */ int txn_commit(txnp) DB_TXN *txnp; @@ -337,7 +338,10 @@ txn_commit(txnp) return (__txn_end(txnp, 1)); } -/* The db_txn(3) man page describes txn_abort. */ +/* + * txn_abort -- + * Abort a transcation. + */ int txn_abort(txnp) DB_TXN *txnp; @@ -395,8 +399,8 @@ txn_id(txnp) } /* - * The db_txn(3) man page describes txn_close. Currently the caller should - * arrange a checkpoint before calling txn_close. + * txn_close -- + * Close the transaction region, does not imply a checkpoint. */ int txn_close(tmgrp) @@ -439,8 +443,8 @@ txn_close(tmgrp) } /* - * The db_txn(3) man page describes txn_unlink. Right now it is up to - * txn_close to write the final checkpoint record. + * txn_unlink -- + * Remove the transaction region. */ int txn_unlink(path, force, dbenv) @@ -666,12 +670,19 @@ do_ckp: mgr->region->pending_ckp = ckp_lsn; UNLOCK_TXNREGION(mgr); - ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn); - if (ret > 0) { - __db_err(mgr->dbenv, - "txn_checkpoint: system failure in memp_sync %s\n", - strerror(ret)); - } else if (ret == 0 && mgr->dbenv->lg_info != NULL) { + if (mgr->dbenv->mp_info != NULL && + (ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn)) != 0) { + /* + * ret < 0 means that there are still buffers to flush; + * the checkpoint is not complete. Back off and try again. + */ + if (ret > 0) + __db_err(mgr->dbenv, + "txn_checkpoint: system failure in memp_sync %s\n", + strerror(ret)); + return (ret); + } + if (mgr->dbenv->lg_info != NULL) { LOCK_TXNREGION(mgr); last_ckp = mgr->region->last_ckp; ZERO_LSN(mgr->region->pending_ckp); @@ -691,11 +702,7 @@ do_ckp: (void)time(&mgr->region->time_ckp); UNLOCK_TXNREGION(mgr); } - /* - * ret < 0 means that there are still buffers to flush; the - * checkpoint is not complete. Back off and try again. - */ - return (ret); + return (0); } /* diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c index 9edbc03eab..38627466a8 100644 --- a/db2/txn/txn_auto.c +++ b/db2/txn/txn_auto.c @@ -73,7 +73,6 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __txn_regop_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __txn_regop_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; @@ -202,7 +201,6 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags, * PUBLIC: int __txn_ckp_print * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ - int __txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4) DB_LOG *notused1; |