diff options
Diffstat (limited to 'db2/include/db_page.h')
-rw-r--r-- | db2/include/db_page.h | 512 |
1 files changed, 0 insertions, 512 deletions
diff --git a/db2/include/db_page.h b/db2/include/db_page.h deleted file mode 100644 index 5c9ca674f1..0000000000 --- a/db2/include/db_page.h +++ /dev/null @@ -1,512 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - * - * @(#)db_page.h 10.18 (Sleepycat) 12/2/98 - */ - -#ifndef _DB_PAGE_H_ -#define _DB_PAGE_H_ - -/* - * DB page formats. - * - * This implementation requires that values within the following structures - * NOT be padded -- note, ANSI C permits random padding within structures. - * If your compiler pads randomly you can just forget ever making DB run on - * your system. In addition, no data type can require larger alignment than - * its own size, e.g., a 4-byte data element may not require 8-byte alignment. - * - * Note that key/data lengths are often stored in db_indx_t's -- this is - * not accidental, nor does it limit the key/data size. If the key/data - * item fits on a page, it's guaranteed to be small enough to fit into a - * db_indx_t, and storing it in one saves space. - */ - -#define PGNO_METADATA 0 /* Metadata page number. */ -#define PGNO_INVALID 0 /* Metadata page number, therefore illegal. */ -#define PGNO_ROOT 1 /* Root is page #1. */ - -/* - * When we create pages in mpool, we ask mpool to clear some number of bytes - * in the header. This number must be at least as big as the regular page - * headers and cover enough of the btree and hash meta-data pages to obliterate - * the magic and version numbers. - */ -#define DB_PAGE_CLEAR_LEN 32 - -/************************************************************************ - BTREE METADATA PAGE LAYOUT - ************************************************************************/ - -/* - * Btree metadata page layout: - */ -typedef struct _btmeta { - DB_LSN lsn; /* 00-07: LSN. */ - db_pgno_t pgno; /* 08-11: Current page number. */ - u_int32_t magic; /* 12-15: Magic number. */ - u_int32_t version; /* 16-19: Version. */ - u_int32_t pagesize; /* 20-23: Pagesize. */ - u_int32_t maxkey; /* 24-27: Btree: Maxkey. */ - u_int32_t minkey; /* 28-31: Btree: Minkey. */ - u_int32_t free; /* 32-35: Free list page number. */ -#define BTM_DUP 0x001 /* Duplicates. */ -#define BTM_RECNO 0x002 /* Recno tree. */ -#define BTM_RECNUM 0x004 /* Btree: maintain record count. */ -#define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */ -#define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */ -#define BTM_MASK 0x01f - u_int32_t flags; /* 36-39: Flags. */ - u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */ - u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */ - /* 48-67: Unique file ID. */ - u_int8_t uid[DB_FILE_ID_LEN]; -} BTMETA; - -/************************************************************************ - HASH METADATA PAGE LAYOUT - ************************************************************************/ - -/* - * Hash metadata page layout: - */ -/* Hash Table Information */ -typedef struct hashhdr { /* Disk resident portion */ - DB_LSN lsn; /* 00-07: LSN of the header page */ - db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */ - u_int32_t magic; /* 12-15: Magic NO for hash tables */ - u_int32_t version; /* 16-19: Version ID */ - u_int32_t pagesize; /* 20-23: Bucket/Page Size */ - u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */ - u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */ - u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */ - u_int32_t high_mask; /* 36-39: Modulo mask into table */ - u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */ - u_int32_t ffactor; /* 44-47: Fill factor */ - u_int32_t nelem; /* 48-51: Number of keys in hash table */ - u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */ -#define DB_HASH_DUP 0x01 - u_int32_t flags; /* 56-59: Allow duplicates. */ -#define NCACHED 32 /* number of spare points */ - /* 60-187: Spare pages for overflow */ - u_int32_t spares[NCACHED]; - /* 188-207: Unique file ID. */ - u_int8_t uid[DB_FILE_ID_LEN]; - - /* - * Minimum page size is 256. - */ -} HASHHDR; - -/************************************************************************ - MAIN PAGE LAYOUT - ************************************************************************/ - -/* - * +-----------------------------------+ - * | lsn | pgno | prev pgno | - * +-----------------------------------+ - * | next pgno | entries | hf offset | - * +-----------------------------------+ - * | level | type | index | - * +-----------------------------------+ - * | index | free --> | - * +-----------+-----------------------+ - * | F R E E A R E A | - * +-----------------------------------+ - * | <-- free | item | - * +-----------------------------------+ - * | item | item | item | - * +-----------------------------------+ - * - * sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be - * two-byte aligned. - * - * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the - * key for inp[1]'s data. All other types of pages only contain single items. - */ -typedef struct _db_page { - DB_LSN lsn; /* 00-07: Log sequence number. */ - db_pgno_t pgno; /* 08-11: Current page number. */ - db_pgno_t prev_pgno; /* 12-15: Previous page number. */ - db_pgno_t next_pgno; /* 16-19: Next page number. */ - db_indx_t entries; /* 20-21: Number of item pairs on the page. */ - db_indx_t hf_offset; /* 22-23: High free byte page offset. */ - - /* - * The btree levels are numbered from the leaf to the root, starting - * with 1, so the leaf is level 1, its parent is level 2, and so on. - * We maintain this level on all btree pages, but the only place that - * we actually need it is on the root page. It would not be difficult - * to hide the byte on the root page once it becomes an internal page, - * so we could get this byte back if we needed it for something else. - */ -#define LEAFLEVEL 1 -#define MAXBTREELEVEL 255 - u_int8_t level; /* 24: Btree tree level. */ - -#define P_INVALID 0 /* Invalid page type. */ -#define P_DUPLICATE 1 /* Duplicate. */ -#define P_HASH 2 /* Hash. */ -#define P_IBTREE 3 /* Btree internal. */ -#define P_IRECNO 4 /* Recno internal. */ -#define P_LBTREE 5 /* Btree leaf. */ -#define P_LRECNO 6 /* Recno leaf. */ -#define P_OVERFLOW 7 /* Overflow. */ - u_int8_t type; /* 25: Page type. */ - db_indx_t inp[1]; /* Variable length index of items. */ -} PAGE; - -/* Element macros. */ -#define LSN(p) (((PAGE *)p)->lsn) -#define PGNO(p) (((PAGE *)p)->pgno) -#define PREV_PGNO(p) (((PAGE *)p)->prev_pgno) -#define NEXT_PGNO(p) (((PAGE *)p)->next_pgno) -#define NUM_ENT(p) (((PAGE *)p)->entries) -#define HOFFSET(p) (((PAGE *)p)->hf_offset) -#define LEVEL(p) (((PAGE *)p)->level) -#define TYPE(p) (((PAGE *)p)->type) - -/* - * !!! - * The next_pgno and prev_pgno fields are not maintained for btree and recno - * internal pages. It's a minor performance improvement, and more, it's - * hard to do when deleting internal pages, and it decreases the chance of - * deadlock during deletes and splits. - * - * !!! - * The btree/recno access method needs db_recno_t bytes of space on the root - * page to specify how many records are stored in the tree. (The alternative - * is to store the number of records in the meta-data page, which will create - * a second hot spot in trees being actively modified, or recalculate it from - * the BINTERNAL fields on each access.) Overload the prev_pgno field. - */ -#define RE_NREC(p) \ - (TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : \ - TYPE(p) == P_LRECNO ? NUM_ENT(p) : PREV_PGNO(p)) -#define RE_NREC_ADJ(p, adj) \ - PREV_PGNO(p) += adj; -#define RE_NREC_SET(p, num) \ - PREV_PGNO(p) = num; - -/* - * Initialize a page. - * - * !!! - * Don't modify the page's LSN, code depends on it being unchanged after a - * P_INIT call. - */ -#define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \ - PGNO(pg) = n; \ - PREV_PGNO(pg) = pg_prev; \ - NEXT_PGNO(pg) = pg_next; \ - NUM_ENT(pg) = 0; \ - HOFFSET(pg) = pg_size; \ - LEVEL(pg) = btl; \ - TYPE(pg) = pg_type; \ -} while (0) - -/* Page header length (offset to first index). */ -#define P_OVERHEAD (SSZA(PAGE, inp)) - -/* First free byte. */ -#define LOFFSET(pg) (P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t)) - -/* Free space on the page. */ -#define P_FREESPACE(pg) (HOFFSET(pg) - LOFFSET(pg)) - -/* Get a pointer to the bytes at a specific index. */ -#define P_ENTRY(pg, indx) ((u_int8_t *)pg + ((PAGE *)pg)->inp[indx]) - -/************************************************************************ - OVERFLOW PAGE LAYOUT - ************************************************************************/ - -/* - * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which - * store a page number (the first page of the overflow item) and a length - * (the total length of the overflow item). The overflow item consists of - * some number of overflow pages, linked by the next_pgno field of the page. - * A next_pgno field of PGNO_INVALID flags the end of the overflow item. - * - * Overflow page overloads: - * The amount of overflow data stored on each page is stored in the - * hf_offset field. - * - * The implementation reference counts overflow items as it's possible - * for them to be promoted onto btree internal pages. The reference - * count is stored in the entries field. - */ -#define OV_LEN(p) (((PAGE *)p)->hf_offset) -#define OV_REF(p) (((PAGE *)p)->entries) - -/* Maximum number of bytes that you can put on an overflow page. */ -#define P_MAXSPACE(psize) ((psize) - P_OVERHEAD) - -/************************************************************************ - HASH PAGE LAYOUT - ************************************************************************/ - -/* Each index references a group of bytes on the page. */ -#define H_KEYDATA 1 /* Key/data item. */ -#define H_DUPLICATE 2 /* Duplicate key/data item. */ -#define H_OFFPAGE 3 /* Overflow key/data item. */ -#define H_OFFDUP 4 /* Overflow page of duplicates. */ - -/* - * !!! - * Items on hash pages are (potentially) unaligned, so we can never cast the - * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as - * we do with B+tree on-page structures. Because we frequently want the type - * field, it requires no alignment, and it's in the same location in all three - * structures, there's a pair of macros. - */ -#define HPAGE_PTYPE(p) (*(u_int8_t *)p) -#define HPAGE_TYPE(pg, indx) (*P_ENTRY(pg, indx)) - -/* - * The first and second types are H_KEYDATA and H_DUPLICATE, represented - * by the HKEYDATA structure: - * - * +-----------------------------------+ - * | type | key/data ... | - * +-----------------------------------+ - * - * For duplicates, the data field encodes duplicate elements in the data - * field: - * - * +---------------------------------------------------------------+ - * | type | len1 | element1 | len1 | len2 | element2 | len2 | - * +---------------------------------------------------------------+ - * - * Thus, by keeping track of the offset in the element, we can do both - * backward and forward traversal. - */ -typedef struct _hkeydata { - u_int8_t type; /* 00: Page type. */ - u_int8_t data[1]; /* Variable length key/data item. */ -} HKEYDATA; -#define HKEYDATA_DATA(p) (((u_int8_t *)p) + SSZA(HKEYDATA, data)) - -/* - * The length of any HKEYDATA item. Note that indx is an element index, - * not a PAIR index. - */ -#define LEN_HITEM(pg, pgsize, indx) \ - (((indx) == 0 ? pgsize : pg->inp[indx - 1]) - pg->inp[indx]) - -#define LEN_HKEYDATA(pg, psize, indx) \ - (((indx) == 0 ? psize : pg->inp[indx - 1]) - \ - pg->inp[indx] - HKEYDATA_SIZE(0)) - -/* - * Page space required to add a new HKEYDATA item to the page, with and - * without the index value. - */ -#define HKEYDATA_SIZE(len) \ - ((len) + SSZA(HKEYDATA, data)) -#define HKEYDATA_PSIZE(len) \ - (HKEYDATA_SIZE(len) + sizeof(db_indx_t)) - -/* Put a HKEYDATA item at the location referenced by a page entry. */ -#define PUT_HKEYDATA(pe, kd, len, type) { \ - ((HKEYDATA *)pe)->type = type; \ - memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len); \ -} - -/* - * Macros the describe the page layout in terms of key-data pairs. - * The use of "pindex" indicates that the argument is the index - * expressed in pairs instead of individual elements. - */ -#define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2) -#define H_KEYINDEX(pindx) (2 * (pindx)) -#define H_DATAINDEX(pindx) ((2 * (pindx)) + 1) -#define H_PAIRKEY(pg, pindx) P_ENTRY(pg, H_KEYINDEX(pindx)) -#define H_PAIRDATA(pg, pindx) P_ENTRY(pg, H_DATAINDEX(pindx)) -#define H_PAIRSIZE(pg, psize, pindx) \ - (LEN_HITEM(pg, psize, H_KEYINDEX(pindx)) + \ - LEN_HITEM(pg, psize, H_DATAINDEX(pindx))) -#define LEN_HDATA(p, psize, pindx) LEN_HKEYDATA(p, psize, H_DATAINDEX(pindx)) -#define LEN_HKEY(p, psize, pindx) LEN_HKEYDATA(p, psize, H_KEYINDEX(pindx)) - -/* - * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure: - */ -typedef struct _hoffpage { - u_int8_t type; /* 00: Page type and delete flag. */ - u_int8_t unused[3]; /* 01-03: Padding, unused. */ - db_pgno_t pgno; /* 04-07: Offpage page number. */ - u_int32_t tlen; /* 08-11: Total length of item. */ -} HOFFPAGE; - -#define HOFFPAGE_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno)) -#define HOFFPAGE_TLEN(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen)) - -/* - * Page space required to add a new HOFFPAGE item to the page, with and - * without the index value. - */ -#define HOFFPAGE_SIZE (sizeof(HOFFPAGE)) -#define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t)) - -/* - * The fourth type is H_OFFDUP represented by the HOFFDUP structure: - */ -typedef struct _hoffdup { - u_int8_t type; /* 00: Page type and delete flag. */ - u_int8_t unused[3]; /* 01-03: Padding, unused. */ - db_pgno_t pgno; /* 04-07: Offpage page number. */ -} HOFFDUP; -#define HOFFDUP_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFDUP, pgno)) - -/* - * Page space required to add a new HOFFDUP item to the page, with and - * without the index value. - */ -#define HOFFDUP_SIZE (sizeof(HOFFDUP)) -#define HOFFDUP_PSIZE (HOFFDUP_SIZE + sizeof(db_indx_t)) - -/************************************************************************ - BTREE PAGE LAYOUT - ************************************************************************/ - -/* Each index references a group of bytes on the page. */ -#define B_KEYDATA 1 /* Key/data item. */ -#define B_DUPLICATE 2 /* Duplicate key/data item. */ -#define B_OVERFLOW 3 /* Overflow key/data item. */ - -/* - * We have to store a deleted entry flag in the page. The reason is complex, - * but the simple version is that we can't delete on-page items referenced by - * a cursor -- the return order of subsequent insertions might be wrong. The - * delete flag is an overload of the top bit of the type byte. - */ -#define B_DELETE (0x80) -#define B_DCLR(t) (t) &= ~B_DELETE -#define B_DSET(t) (t) |= B_DELETE -#define B_DISSET(t) ((t) & B_DELETE) - -#define B_TYPE(t) ((t) & ~B_DELETE) -#define B_TSET(t, type, deleted) { \ - (t) = (type); \ - if (deleted) \ - B_DSET(t); \ -} - -/* - * The first type is B_KEYDATA, represented by the BKEYDATA structure: - */ -typedef struct _bkeydata { - db_indx_t len; /* 00-01: Key/data item length. */ - u_int8_t type; /* 02: Page type AND DELETE FLAG. */ - u_int8_t data[1]; /* Variable length key/data item. */ -} BKEYDATA; - -/* Get a BKEYDATA item for a specific index. */ -#define GET_BKEYDATA(pg, indx) \ - ((BKEYDATA *)P_ENTRY(pg, indx)) - -/* - * Page space required to add a new BKEYDATA item to the page, with and - * without the index value. - */ -#define BKEYDATA_SIZE(len) \ - ALIGN((len) + SSZA(BKEYDATA, data), 4) -#define BKEYDATA_PSIZE(len) \ - (BKEYDATA_SIZE(len) + sizeof(db_indx_t)) - -/* - * The second and third types are B_DUPLICATE and B_OVERFLOW, represented - * by the BOVERFLOW structure. - */ -typedef struct _boverflow { - db_indx_t unused1; /* 00-01: Padding, unused. */ - u_int8_t type; /* 02: Page type AND DELETE FLAG. */ - u_int8_t unused2; /* 03: Padding, unused. */ - db_pgno_t pgno; /* 04-07: Next page number. */ - u_int32_t tlen; /* 08-11: Total length of item. */ -} BOVERFLOW; - -/* Get a BOVERFLOW item for a specific index. */ -#define GET_BOVERFLOW(pg, indx) \ - ((BOVERFLOW *)P_ENTRY(pg, indx)) - -/* - * Page space required to add a new BOVERFLOW item to the page, with and - * without the index value. - */ -#define BOVERFLOW_SIZE \ - ALIGN(sizeof(BOVERFLOW), 4) -#define BOVERFLOW_PSIZE \ - (BOVERFLOW_SIZE + sizeof(db_indx_t)) - -/* - * Btree leaf and hash page layouts group indices in sets of two, one - * for the key and one for the data. Everything else does it in sets - * of one to save space. I use the following macros so that it's real - * obvious what's going on... - */ -#define O_INDX 1 -#define P_INDX 2 - -/************************************************************************ - BTREE INTERNAL PAGE LAYOUT - ************************************************************************/ - -/* - * Btree internal entry. - */ -typedef struct _binternal { - db_indx_t len; /* 00-01: Key/data item length. */ - u_int8_t type; /* 02: Page type AND DELETE FLAG. */ - u_int8_t unused; /* 03: Padding, unused. */ - db_pgno_t pgno; /* 04-07: Page number of referenced page. */ - db_recno_t nrecs; /* 08-11: Subtree record count. */ - u_int8_t data[1]; /* Variable length key item. */ -} BINTERNAL; - -/* Get a BINTERNAL item for a specific index. */ -#define GET_BINTERNAL(pg, indx) \ - ((BINTERNAL *)P_ENTRY(pg, indx)) - -/* - * Page space required to add a new BINTERNAL item to the page, with and - * without the index value. - */ -#define BINTERNAL_SIZE(len) \ - ALIGN((len) + SSZA(BINTERNAL, data), 4) -#define BINTERNAL_PSIZE(len) \ - (BINTERNAL_SIZE(len) + sizeof(db_indx_t)) - -/************************************************************************ - RECNO INTERNAL PAGE LAYOUT - ************************************************************************/ - -/* - * The recno internal entry. - * - * XXX - * Why not fold this into the db_indx_t structure, it's fixed length? - */ -typedef struct _rinternal { - db_pgno_t pgno; /* 00-03: Page number of referenced page. */ - db_recno_t nrecs; /* 04-07: Subtree record count. */ -} RINTERNAL; - -/* Get a RINTERNAL item for a specific index. */ -#define GET_RINTERNAL(pg, indx) \ - ((RINTERNAL *)P_ENTRY(pg, indx)) - -/* - * Page space required to add a new RINTERNAL item to the page, with and - * without the index value. - */ -#define RINTERNAL_SIZE \ - ALIGN(sizeof(RINTERNAL), 4) -#define RINTERNAL_PSIZE \ - (RINTERNAL_SIZE + sizeof(db_indx_t)) -#endif /* _DB_PAGE_H_ */ |