/*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. ***REMOVED*** - see * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; #endif /* LIBC_SCCS and not lint */ /* * PACKAGE: hash * DESCRIPTION: * Big key/data handling for the hashing package. * * ROUTINES: * External * __big_keydata * __big_split * __big_insert * __big_return * __big_delete * __find_last_page * Internal * collect_key * collect_data */ #if !defined(_WIN32) && !defined(_WINDOWS) && !defined(macintosh) #include <sys/param.h> #endif #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #ifdef DEBUG #include <assert.h> #endif #include "mcom_db.h" #include "hash.h" #include "page.h" /* #include "extern.h" */ static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); static int collect_data(HTAB *, BUFHEAD *, int, int); /* * Big_insert * * You need to do an insert and the key/data pair is too big * * Returns: * 0 ==> OK *-1 ==> ERROR */ extern int __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) { register uint16 *p; uint key_size, n, val_size; uint16 space, move_bytes, off; char *cp, *key_data, *val_data; cp = bufp->page; /* Character pointer of p. */ p = (uint16 *)cp; key_data = (char *)key->data; key_size = key->size; val_data = (char *)val->data; val_size = val->size; /* First move the Key */ for (space = FREESPACE(p) - BIGOVERHEAD; key_size; space = FREESPACE(p) - BIGOVERHEAD) { move_bytes = PR_MIN(space, key_size); off = OFFSET(p) - move_bytes; memmove(cp + off, key_data, move_bytes); key_size -= move_bytes; key_data += move_bytes; n = p[0]; p[++n] = off; p[0] = ++n; FREESPACE(p) = off - PAGE_META(n); OFFSET(p) = off; p[n] = PARTIAL_KEY; bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); n = p[0]; if (!key_size) { if (FREESPACE(p)) { move_bytes = PR_MIN(FREESPACE(p), val_size); off = OFFSET(p) - move_bytes; p[n] = off; memmove(cp + off, val_data, move_bytes); val_data += move_bytes; val_size -= move_bytes; p[n - 2] = FULL_KEY_DATA; FREESPACE(p) = FREESPACE(p) - move_bytes; OFFSET(p) = off; } else p[n - 2] = FULL_KEY; } p = (uint16 *)bufp->page; cp = bufp->page; bufp->flags |= BUF_MOD; } /* Now move the data */ for (space = FREESPACE(p) - BIGOVERHEAD; val_size; space = FREESPACE(p) - BIGOVERHEAD) { move_bytes = PR_MIN(space, val_size); /* * Here's the hack to make sure that if the data ends on the * same page as the key ends, FREESPACE is at least one. */ if (space == val_size && val_size == val->size) move_bytes--; off = OFFSET(p) - move_bytes; memmove(cp + off, val_data, move_bytes); val_size -= move_bytes; val_data += move_bytes; n = p[0]; p[++n] = off; p[0] = ++n; FREESPACE(p) = off - PAGE_META(n); OFFSET(p) = off; if (val_size) { p[n] = FULL_KEY; bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); cp = bufp->page; p = (uint16 *)cp; } else p[n] = FULL_KEY_DATA; bufp->flags |= BUF_MOD; } return (0); } /* * Called when bufp's page contains a partial key (index should be 1) * * All pages in the big key/data pair except bufp are freed. We cannot * free bufp because the page pointing to it is lost and we can't get rid * of its pointer. * * Returns: * 0 => OK *-1 => ERROR */ extern int __big_delete(HTAB *hashp, BUFHEAD *bufp) { register BUFHEAD *last_bfp, *rbufp; uint16 *bp, pageno; int key_done, n; rbufp = bufp; last_bfp = NULL; bp = (uint16 *)bufp->page; pageno = 0; key_done = 0; while (!key_done || (bp[2] != FULL_KEY_DATA)) { if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) key_done = 1; /* * If there is freespace left on a FULL_KEY_DATA page, then * the data is short and fits entirely on this page, and this * is the last page. */ if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) break; pageno = bp[bp[0] - 1]; rbufp->flags |= BUF_MOD; rbufp = __get_buf(hashp, pageno, rbufp, 0); if (last_bfp) __free_ovflpage(hashp, last_bfp); last_bfp = rbufp; if (!rbufp) return (-1); /* Error. */ bp = (uint16 *)rbufp->page; } /* * If we get here then rbufp points to the last page of the big * key/data pair. Bufp points to the first one -- it should now be * empty pointing to the next page after this pair. Can't free it * because we don't have the page pointing to it. */ /* This is information from the last page of the pair. */ n = bp[0]; pageno = bp[n - 1]; /* Now, bp is the first page of the pair. */ bp = (uint16 *)bufp->page; if (n > 2) { /* There is an overflow page. */ bp[1] = pageno; bp[2] = OVFLPAGE; bufp->ovfl = rbufp->ovfl; } else /* This is the last page. */ bufp->ovfl = NULL; n -= 2; bp[0] = n; FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); OFFSET(bp) = hashp->BSIZE - 1; bufp->flags |= BUF_MOD; if (rbufp) __free_ovflpage(hashp, rbufp); if (last_bfp != rbufp) __free_ovflpage(hashp, last_bfp); hashp->NKEYS--; return (0); } /* * Returns: * 0 = key not found * -1 = get next overflow page * -2 means key not found and this is big key/data * -3 error */ extern int __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) { register uint16 *bp; register char *p; int ksize; uint16 bytes; char *kkey; bp = (uint16 *)bufp->page; p = bufp->page; ksize = size; kkey = key; for (bytes = hashp->BSIZE - bp[ndx]; bytes <= size && bp[ndx + 1] == PARTIAL_KEY; bytes = hashp->BSIZE - bp[ndx]) { if (memcmp(p + bp[ndx], kkey, bytes)) return (-2); kkey += bytes; ksize -= bytes; bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); if (!bufp) return (-3); p = bufp->page; bp = (uint16 *)p; ndx = 1; } if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { #ifdef HASH_STATISTICS ++hash_collisions; #endif return (-2); } else return (ndx); } /* * Given the buffer pointer of the first overflow page of a big pair, * find the end of the big pair * * This will set bpp to the buffer header of the last page of the big pair. * It will return the pageno of the overflow page following the last page * of the pair; 0 if there isn't any (i.e. big pair is the last key in the * bucket) */ extern uint16 __find_last_page(HTAB *hashp, BUFHEAD **bpp) { BUFHEAD *bufp; uint16 *bp, pageno; uint n; bufp = *bpp; bp = (uint16 *)bufp->page; for (;;) { n = bp[0]; /* * This is the last page if: the tag is FULL_KEY_DATA and * either only 2 entries OVFLPAGE marker is explicit there * is freespace on the page. */ if (bp[2] == FULL_KEY_DATA && ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) break; /* LJM bound the size of n to reasonable limits */ if (n > hashp->BSIZE / sizeof(uint16)) return (0); pageno = bp[n - 1]; bufp = __get_buf(hashp, pageno, bufp, 0); if (!bufp) return (0); /* Need to indicate an error! */ bp = (uint16 *)bufp->page; } *bpp = bufp; if (bp[0] > 2) return (bp[3]); else return (0); } /* * Return the data for the key/data pair that begins on this page at this * index (index should always be 1). */ extern int __big_return( HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) { BUFHEAD *save_p; uint16 *bp, len, off, save_addr; char *tp; int save_flags; bp = (uint16 *)bufp->page; while (bp[ndx + 1] == PARTIAL_KEY) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (uint16 *)bufp->page; ndx = 1; } if (bp[ndx + 1] == FULL_KEY) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (uint16 *)bufp->page; save_p = bufp; save_addr = save_p->addr; off = bp[1]; len = 0; } else if (!FREESPACE(bp)) { /* * This is a hack. We can't distinguish between * FULL_KEY_DATA that contains complete data or * incomplete data, so we require that if the data * is complete, there is at least 1 byte of free * space left. */ off = bp[bp[0]]; len = bp[1] - off; save_p = bufp; save_addr = bufp->addr; bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (uint16 *)bufp->page; } else { /* The data is all on one page. */ tp = (char *)bp; off = bp[bp[0]]; val->data = (uint8 *)tp + off; val->size = bp[1] - off; if (set_current) { if (bp[0] == 2) { /* No more buckets in * chain */ hashp->cpage = NULL; hashp->cbucket++; hashp->cndx = 1; } else { hashp->cpage = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!hashp->cpage) return (-1); hashp->cndx = 1; if (!((uint16 *) hashp->cpage->page)[0]) { hashp->cbucket++; hashp->cpage = NULL; } } } return (0); } /* pin our saved buf so that we don't lose if * we run out of buffers */ save_flags = save_p->flags; save_p->flags |= BUF_PIN; val->size = collect_data(hashp, bufp, (int)len, set_current); save_p->flags = save_flags; if (val->size == (size_t)-1) return (-1); if (save_p->addr != save_addr) { /* We are pretty short on buffers. */ errno = EINVAL; /* OUT OF BUFFERS */ return (-1); } memmove(hashp->tmp_buf, (save_p->page) + off, len); val->data = (uint8 *)hashp->tmp_buf; return (0); } /* * Count how big the total datasize is by looping through the pages. Then * allocate a buffer and copy the data in the second loop. NOTE: Our caller * may already have a bp which it is holding onto. The caller is * responsible for copying that bp into our temp buffer. 'len' is how much * space to reserve for that buffer. */ static int collect_data( HTAB *hashp, BUFHEAD *bufp, int len, int set) { register uint16 *bp; BUFHEAD *save_bufp; int save_flags; int mylen, totlen; /* * save the input buf head because we need to walk the list twice. * pin it to make sure it doesn't leave the buffer pool. * This has the effect of growing the buffer pool if necessary. */ save_bufp = bufp; save_flags = save_bufp->flags; save_bufp->flags |= BUF_PIN; /* read the length of the buffer */ for (totlen = len; bufp; bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0)) { bp = (uint16 *)bufp->page; mylen = hashp->BSIZE - bp[1]; /* if mylen ever goes negative it means that the * page is screwed up. */ if (mylen < 0) { save_bufp->flags = save_flags; return (-1); } totlen += mylen; if (bp[2] == FULL_KEY_DATA) { /* End of Data */ break; } } if (!bufp) { save_bufp->flags = save_flags; return (-1); } /* allocate a temp buf */ if (hashp->tmp_buf) free(hashp->tmp_buf); if ((hashp->tmp_buf = (char *)malloc((size_t)totlen)) == NULL) { save_bufp->flags = save_flags; return (-1); } /* copy the buffers back into temp buf */ for (bufp = save_bufp; bufp; bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0)) { bp = (uint16 *)bufp->page; mylen = hashp->BSIZE - bp[1]; memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], (size_t)mylen); len += mylen; if (bp[2] == FULL_KEY_DATA) { break; } } /* 'clear' the pin flags */ save_bufp->flags = save_flags; /* update the database cursor */ if (set) { hashp->cndx = 1; if (bp[0] == 2) { /* No more buckets in chain */ hashp->cpage = NULL; hashp->cbucket++; } else { hashp->cpage = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!hashp->cpage) return (-1); else if (!((uint16 *)hashp->cpage->page)[0]) { hashp->cbucket++; hashp->cpage = NULL; } } } return (totlen); } /* * Fill in the key and data for this big pair. */ extern int __big_keydata( HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) { key->size = collect_key(hashp, bufp, 0, val, set); if (key->size == (size_t)-1) return (-1); key->data = (uint8 *)hashp->tmp_key; return (0); } /* * Count how big the total key size is by recursing through the pages. Then * collect the data, allocate a buffer and copy the key as you recurse up. */ static int collect_key( HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) { BUFHEAD *xbp; char *p; int mylen, totlen; uint16 *bp, save_addr; p = bufp->page; bp = (uint16 *)p; mylen = hashp->BSIZE - bp[1]; save_addr = bufp->addr; totlen = len + mylen; if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ if (hashp->tmp_key != NULL) free(hashp->tmp_key); if ((hashp->tmp_key = (char *)malloc((size_t)totlen)) == NULL) return (-1); if (__big_return(hashp, bufp, 1, val, set)) return (-1); } else { xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!xbp || ((totlen = collect_key(hashp, xbp, totlen, val, set)) < 1)) return (-1); } if (bufp->addr != save_addr) { errno = EINVAL; /* MIS -- OUT OF BUFFERS */ return (-1); } memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], (size_t)mylen); return (totlen); } /* * Returns: * 0 => OK * -1 => error */ extern int __big_split( HTAB *hashp, BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ BUFHEAD *np, /* Pointer to new bucket page */ /* Pointer to first page containing the big key/data */ BUFHEAD *big_keyp, uint32 addr, /* Address of big_keyp */ uint32 obucket, /* Old Bucket */ SPLIT_RETURN *ret) { register BUFHEAD *tmpp; register uint16 *tp; BUFHEAD *bp; DBT key, val; uint32 change; uint16 free_space, n, off; bp = big_keyp; /* Now figure out where the big key/data goes */ if (__big_keydata(hashp, big_keyp, &key, &val, 0)) return (-1); change = (__call_hash(hashp, (char *)key.data, key.size) != obucket); if ((ret->next_addr = __find_last_page(hashp, &big_keyp))) { if (!(ret->nextp = __get_buf(hashp, ret->next_addr, big_keyp, 0))) return (-1); ; } else ret->nextp = NULL; /* Now make one of np/op point to the big key/data pair */ #ifdef DEBUG assert(np->ovfl == NULL); #endif if (change) tmpp = np; else tmpp = op; tmpp->flags |= BUF_MOD; #ifdef DEBUG1 (void)fprintf(stderr, "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); #endif tmpp->ovfl = bp; /* one of op/np point to big_keyp */ tp = (uint16 *)tmpp->page; #if 0 /* this get's tripped on database corrupted error */ assert(FREESPACE(tp) >= OVFLSIZE); #endif if (FREESPACE(tp) < OVFLSIZE) return (DATABASE_CORRUPTED_ERROR); n = tp[0]; off = OFFSET(tp); free_space = FREESPACE(tp); tp[++n] = (uint16)addr; tp[++n] = OVFLPAGE; tp[0] = n; OFFSET(tp) = off; FREESPACE(tp) = free_space - OVFLSIZE; /* * Finally, set the new and old return values. BIG_KEYP contains a * pointer to the last page of the big key_data pair. Make sure that * big_keyp has no following page (2 elements) or create an empty * following page. */ ret->newp = np; ret->oldp = op; tp = (uint16 *)big_keyp->page; big_keyp->flags |= BUF_MOD; if (tp[0] > 2) { /* * There may be either one or two offsets on this page. If * there is one, then the overflow page is linked on normally * and tp[4] is OVFLPAGE. If there are two, tp[4] contains * the second offset and needs to get stuffed in after the * next overflow page is added. */ n = tp[4]; free_space = FREESPACE(tp); off = OFFSET(tp); tp[0] -= 2; FREESPACE(tp) = free_space + OVFLSIZE; OFFSET(tp) = off; tmpp = __add_ovflpage(hashp, big_keyp); if (!tmpp) return (-1); tp[4] = n; } else tmpp = big_keyp; if (change) ret->newp = tmpp; else ret->oldp = tmpp; return (0); }