Index: src/backend/access/hash/hash.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/hash/hash.c,v retrieving revision 1.54 diff -c -r1.54 hash.c *** src/backend/access/hash/hash.c 2002/03/02 21:39:16 1.54 --- src/backend/access/hash/hash.c 2002/03/05 02:18:19 *************** *** 165,173 **** char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED - Relation heapRel = (Relation) PG_GETARG_POINTER(4); - #endif InsertIndexResult res; HashItem hitem; IndexTuple itup; --- 165,170 ---- *************** *** 333,339 **** /* * hashmarkpos() -- save current scan position - * */ Datum hashmarkpos(PG_FUNCTION_ARGS) --- 330,335 ---- Index: src/backend/access/hash/hashfunc.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v retrieving revision 1.31 diff -c -r1.31 hashfunc.c *** src/backend/access/hash/hashfunc.c 2002/02/25 04:06:47 1.31 --- src/backend/access/hash/hashfunc.c 2002/03/05 02:18:19 *************** *** 95,100 **** --- 95,102 ---- { char *key = NameStr(*PG_GETARG_NAME(0)); + Assert(strlen(key) <= NAMEDATALEN); + return hash_any(key, strlen(key)); } *************** *** 116,176 **** return result; } /* ! * hash_any --- compute a hash function for any specified chunk of memory ! * ! * This can be used as the underlying hash function for any pass-by-reference ! * data type in which there are no non-significant bits. ! * ! * (Comment from the original db3 hashing code: ) ! * ! * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte ! * units. On the first time through the loop we get the 'leftover bytes' ! * (strlen % 8). On every later iteration, we perform 8 HASHC's so we handle ! * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If ! * this routine is heavily used enough, it's worth the ugly coding. ! * ! * "OZ's original sdbm hash" */ Datum ! hash_any(const char *keydata, int keylen) { ! uint32 n; ! int loop; ! ! #define HASHC n = *keydata++ + 65599 * n ! ! n = 0; ! if (keylen > 0) ! { ! loop = (keylen + 8 - 1) >> 3; ! ! switch (keylen & (8 - 1)) ! { ! case 0: ! do ! { /* All fall throughs */ ! HASHC; ! case 7: ! HASHC; ! case 6: ! HASHC; ! case 5: ! HASHC; ! case 4: ! HASHC; ! case 3: ! HASHC; ! case 2: ! HASHC; ! case 1: ! HASHC; ! } while (--loop); ! } ! } ! ! #undef HASHC ! PG_RETURN_UINT32(n); } --- 118,206 ---- return result; } + /* This hash function was written by Bob Jenkins + * (bob_jenkins@burtleburtle.net), and superficially adapted + * for PostgreSQL by Neil Conway. For more information on this + * hash function, see http://burtleburtle.net/bob/hash/doobs.html + */ /* ! * mix -- mix 3 32-bit values reversibly. ! * For every delta with one or two bits set, and the deltas of all three ! * high bits or all three low bits, whether the original value of a,b,c ! * is almost all zero or is uniformly distributed, ! * - If mix() is run forward or backward, at least 32 bits in a,b,c ! * have at least 1/4 probability of changing. ! * - If mix() is run forward, every bit of c will change between 1/3 and ! * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) */ + #define mix(a,b,c) \ + { \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ + } + + /* + * hash_any() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * Returns a 32-bit value. Every bit of the key affects every bit of + * the return value. Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. The best hash table sizes are powers + * of 2. There is no need to do mod a prime (mod is sooo slow!). + * If you need less than 32 bits, use a bitmask. + */ Datum ! hash_any(register const char *k, register int keylen) { ! register Datum a,b,c,len; ! /* Set up the internal state */ ! len = keylen; ! a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ ! /* Another arbitrary value. If the hash function is called ! * multiple times, this could be the previously generated ! * hash value; however, the interface currently doesn't allow ! * this. AFAIK this isn't a big deal. ! */ ! c = 3923095; ! ! /* handle most of the key */ ! while (len >= 12) ! { ! a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24)); ! b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24)); ! c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24)); ! mix(a,b,c); ! k += 12; len -= 12; ! } ! ! /* handle the last 11 bytes */ ! c += keylen; ! switch(len) /* all the case statements fall through */ ! { ! case 11: c+=((Datum)k[10]<<24); ! case 10: c+=((Datum)k[9]<<16); ! case 9 : c+=((Datum)k[8]<<8); ! /* the first byte of c is reserved for the length */ ! case 8 : b+=((Datum)k[7]<<24); ! case 7 : b+=((Datum)k[6]<<16); ! case 6 : b+=((Datum)k[5]<<8); ! case 5 : b+=k[4]; ! case 4 : a+=((Datum)k[3]<<24); ! case 3 : a+=((Datum)k[2]<<16); ! case 2 : a+=((Datum)k[1]<<8); ! case 1 : a+=k[0]; ! /* case 0: nothing left to add */ ! } ! mix(a,b,c); ! /* report the result */ ! return c; } Index: src/backend/access/hash/hashinsert.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v retrieving revision 1.23 diff -c -r1.23 hashinsert.c *** src/backend/access/hash/hashinsert.c 2001/10/25 05:49:21 1.23 --- src/backend/access/hash/hashinsert.c 2002/03/05 02:18:19 *************** *** 49,55 **** itup = &(hitem->hash_itup); if ((natts = rel->rd_rel->relnatts) != 1) elog(ERROR, "Hash indices valid for only one index key."); ! itup_scankey = _hash_mkscankey(rel, itup, metap); /* * find the first page in the bucket chain containing this key and --- 49,55 ---- itup = &(hitem->hash_itup); if ((natts = rel->rd_rel->relnatts) != 1) elog(ERROR, "Hash indices valid for only one index key."); ! itup_scankey = _hash_mkscankey(rel, itup); /* * find the first page in the bucket chain containing this key and *************** *** 232,238 **** RelationGetRelationName(rel)); /* write the buffer, but hold our lock */ ! _hash_wrtnorelbuf(rel, buf); return itup_off; } --- 232,238 ---- RelationGetRelationName(rel)); /* write the buffer, but hold our lock */ ! _hash_wrtnorelbuf(buf); return itup_off; } Index: src/backend/access/hash/hashovfl.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v retrieving revision 1.31 diff -c -r1.31 hashovfl.c *** src/backend/access/hash/hashovfl.c 2001/10/25 05:49:21 1.31 --- src/backend/access/hash/hashovfl.c 2002/03/05 02:18:19 *************** *** 73,83 **** ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; ovflopaque->hasho_oaddr = oaddr; ovflopaque->hasho_bucket = pageopaque->hasho_bucket; ! _hash_wrtnorelbuf(rel, ovflbuf); /* logically chain overflow page to previous page */ pageopaque->hasho_nextblkno = ovflblkno; ! _hash_wrtnorelbuf(rel, buf); return ovflbuf; } --- 73,83 ---- ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; ovflopaque->hasho_oaddr = oaddr; ovflopaque->hasho_bucket = pageopaque->hasho_bucket; ! _hash_wrtnorelbuf(ovflbuf); /* logically chain overflow page to previous page */ pageopaque->hasho_nextblkno = ovflblkno; ! _hash_wrtnorelbuf(buf); return ovflbuf; } *************** *** 574,580 **** * the "next" ItemId. */ PageIndexTupleDelete(rpage, roffnum); ! _hash_wrtnorelbuf(rel, rbuf); /* * if the "read" page is now empty because of the deletion, free --- 574,580 ---- * the "next" ItemId. */ PageIndexTupleDelete(rpage, roffnum); ! _hash_wrtnorelbuf(rbuf); /* * if the "read" page is now empty because of the deletion, free Index: src/backend/access/hash/hashpage.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/hash/hashpage.c,v retrieving revision 1.34 diff -c -r1.34 hashpage.c *** src/backend/access/hash/hashpage.c 2002/01/15 22:14:16 1.34 --- src/backend/access/hash/hashpage.c 2002/03/05 02:18:19 *************** *** 151,157 **** elog(ERROR, "Problem with _hash_initbitmap."); /* all done */ ! _hash_wrtnorelbuf(rel, metabuf); /* * initialize the first two buckets --- 151,157 ---- elog(ERROR, "Problem with _hash_initbitmap."); /* all done */ ! _hash_wrtnorelbuf(metabuf); /* * initialize the first two buckets *************** *** 260,266 **** * or a reference to the buffer. */ void ! _hash_wrtnorelbuf(Relation rel, Buffer buf) { BlockNumber blkno; --- 260,266 ---- * or a reference to the buffer. */ void ! _hash_wrtnorelbuf(Buffer buf) { BlockNumber blkno; *************** *** 383,389 **** opaque = (HashPageOpaque) PageGetSpecialPointer(page); PageIndexTupleDelete(page, offno); ! _hash_wrtnorelbuf(rel, buf); if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) { --- 383,389 ---- opaque = (HashPageOpaque) PageGetSpecialPointer(page); PageIndexTupleDelete(page, offno); ! _hash_wrtnorelbuf(buf); if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) { *************** *** 505,511 **** nopaque->hasho_flag = LH_BUCKET_PAGE; nopaque->hasho_oaddr = InvalidOvflAddress; nopaque->hasho_bucket = nbucket; ! _hash_wrtnorelbuf(rel, nbuf); /* * make sure the old bucket isn't empty. advance 'opage' and friends --- 505,511 ---- nopaque->hasho_flag = LH_BUCKET_PAGE; nopaque->hasho_oaddr = InvalidOvflAddress; nopaque->hasho_bucket = nbucket; ! _hash_wrtnorelbuf(nbuf); /* * make sure the old bucket isn't empty. advance 'opage' and friends *************** *** 628,634 **** == InvalidOffsetNumber) elog(ERROR, "_hash_splitpage: failed to add index item to %s", RelationGetRelationName(rel)); ! _hash_wrtnorelbuf(rel, nbuf); /* * now delete the tuple from the old bucket. after this --- 628,634 ---- == InvalidOffsetNumber) elog(ERROR, "_hash_splitpage: failed to add index item to %s", RelationGetRelationName(rel)); ! _hash_wrtnorelbuf(nbuf); /* * now delete the tuple from the old bucket. after this *************** *** 640,646 **** * instead of calling PageGetMaxOffsetNumber. */ PageIndexTupleDelete(opage, ooffnum); ! _hash_wrtnorelbuf(rel, obuf); omaxoffnum = OffsetNumberPrev(omaxoffnum); /* --- 640,646 ---- * instead of calling PageGetMaxOffsetNumber. */ PageIndexTupleDelete(opage, ooffnum); ! _hash_wrtnorelbuf(obuf); omaxoffnum = OffsetNumberPrev(omaxoffnum); /* Index: src/backend/access/hash/hashutil.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/hash/hashutil.c,v retrieving revision 1.27 diff -c -r1.27 hashutil.c *** src/backend/access/hash/hashutil.c 2001/10/06 23:21:43 1.27 --- src/backend/access/hash/hashutil.c 2002/03/05 02:18:19 *************** *** 1,7 **** /*------------------------------------------------------------------------- * ! * btutils.c ! * Utility code for Postgres btree implementation. * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California --- 1,7 ---- /*------------------------------------------------------------------------- * ! * hashutil.c ! * Utility code for Postgres hash implementation. * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California *************** *** 21,27 **** ScanKey ! _hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap) { ScanKey skey; TupleDesc itupdesc; --- 21,27 ---- ScanKey ! _hash_mkscankey(Relation rel, IndexTuple itup) { ScanKey skey; TupleDesc itupdesc; Index: src/backend/executor/nodeHash.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHash.c,v retrieving revision 1.60 diff -c -r1.60 nodeHash.c *** src/backend/executor/nodeHash.c 2001/10/25 05:49:28 1.60 --- src/backend/executor/nodeHash.c 2002/03/05 02:18:19 *************** *** 115,122 **** HashState *hashstate; Plan *outerPlan; ! SO1_printf("ExecInitHash: %s\n", ! "initializing hash node"); /* * assign the node's execution state --- 115,121 ---- HashState *hashstate; Plan *outerPlan; ! SO_printf("ExecInitHash: initializing hash node\n"); /* * assign the node's execution state Index: src/backend/utils/cache/catcache.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/cache/catcache.c,v retrieving revision 1.90 diff -c -r1.90 catcache.c *** src/backend/utils/cache/catcache.c 2002/03/03 17:47:55 1.90 --- src/backend/utils/cache/catcache.c 2002/03/05 02:18:19 *************** *** 39,45 **** /* * Constants related to size of the catcache. * ! * NCCBUCKETS should be prime and must be less than 64K (because * SharedInvalCatcacheMsg crams hash indexes into a uint16 field). In * practice it should be a lot less, anyway, to avoid chewing up too much * space on hash bucket headers. --- 39,45 ---- /* * Constants related to size of the catcache. * ! * NCCBUCKETS must be a power of two and must be less than 64K (because * SharedInvalCatcacheMsg crams hash indexes into a uint16 field). In * practice it should be a lot less, anyway, to avoid chewing up too much * space on hash bucket headers. *************** *** 47,55 **** * MAXCCTUPLES could be as small as a few hundred, if per-backend memory * consumption is at a premium. */ ! #define NCCBUCKETS 257 /* Hash buckets per CatCache */ #define MAXCCTUPLES 5000 /* Maximum # of tuples in all caches */ /* * variables, macros and other stuff --- 47,62 ---- * MAXCCTUPLES could be as small as a few hundred, if per-backend memory * consumption is at a premium. */ ! #define NCCBUCKETS 256 /* Hash buckets per CatCache */ #define MAXCCTUPLES 5000 /* Maximum # of tuples in all caches */ + /* + * Given a hash value and the size of the hash table, find the bucket + * in which the hash value belongs. Since the hash table must contain + * a power-of-2 number of elements, this is a simple bitmask. + */ + #define HASH_INDEX(h, sz) ((Index) ((h) & ((sz) - 1))) + /* * variables, macros and other stuff *************** *** 380,386 **** /* * inspect the proper hash bucket for matches */ ! hashIndex = (Index) (hashValue % (uint32) ccp->cc_size); for (elt = DLGetHead(&ccp->cc_bucket[hashIndex]); elt; elt = nextelt) { --- 387,393 ---- /* * inspect the proper hash bucket for matches */ ! hashIndex = HASH_INDEX(hashValue, ccp->cc_nbuckets); for (elt = DLGetHead(&ccp->cc_bucket[hashIndex]); elt; elt = nextelt) { *************** *** 490,496 **** int i; /* Remove each tuple in this cache, or at least mark it dead */ ! for (i = 0; i < cache->cc_size; i++) { Dlelem *elt, *nextelt; --- 497,503 ---- int i; /* Remove each tuple in this cache, or at least mark it dead */ ! for (i = 0; i < cache->cc_nbuckets; i++) { Dlelem *elt, *nextelt; *************** *** 578,584 **** continue; /* nope, leave it alone */ /* Yes, scan the tuples and remove those related to relId */ ! for (i = 0; i < cache->cc_size; i++) { Dlelem *elt, *nextelt; --- 585,591 ---- continue; /* nope, leave it alone */ /* Yes, scan the tuples and remove those related to relId */ ! for (i = 0; i < cache->cc_nbuckets; i++) { Dlelem *elt, *nextelt; *************** *** 640,646 **** #define InitCatCache_DEBUG1 \ do { \ elog(DEBUG1, "InitCatCache: rel=%s id=%d nkeys=%d size=%d\n", \ ! cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_size); \ } while(0) #else --- 647,653 ---- #define InitCatCache_DEBUG1 \ do { \ elog(DEBUG1, "InitCatCache: rel=%s id=%d nkeys=%d size=%d\n", \ ! cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_nbuckets); \ } while(0) #else *************** *** 705,711 **** cp->cc_tupdesc = (TupleDesc) NULL; cp->cc_reloidattr = reloidattr; cp->cc_ntup = 0; ! cp->cc_size = NCCBUCKETS; cp->cc_nkeys = nkeys; for (i = 0; i < nkeys; ++i) cp->cc_key[i] = key[i]; --- 712,718 ---- cp->cc_tupdesc = (TupleDesc) NULL; cp->cc_reloidattr = reloidattr; cp->cc_ntup = 0; ! cp->cc_nbuckets = NCCBUCKETS; cp->cc_nkeys = nkeys; for (i = 0; i < nkeys; ++i) cp->cc_key[i] = key[i]; *************** *** 985,991 **** * find the hash bucket in which to look for the tuple */ hashValue = CatalogCacheComputeHashValue(cache, cur_skey); ! hashIndex = (Index) (hashValue % (uint32) cache->cc_size); /* * scan the hash bucket until we find a match or exhaust our tuples --- 992,998 ---- * find the hash bucket in which to look for the tuple */ hashValue = CatalogCacheComputeHashValue(cache, cur_skey); ! hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); /* * scan the hash bucket until we find a match or exhaust our tuples Index: src/include/access/hash.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/access/hash.h,v retrieving revision 1.43 diff -c -r1.43 hash.h *** src/include/access/hash.h 2002/02/25 04:06:52 1.43 --- src/include/access/hash.h 2002/03/05 02:18:19 *************** *** 265,273 **** extern Datum hashint2vector(PG_FUNCTION_ARGS); extern Datum hashname(PG_FUNCTION_ARGS); extern Datum hashvarlena(PG_FUNCTION_ARGS); ! extern Datum hash_any(const char *keydata, int keylen); - /* private routines */ /* hashinsert.c */ --- 265,272 ---- extern Datum hashint2vector(PG_FUNCTION_ARGS); extern Datum hashname(PG_FUNCTION_ARGS); extern Datum hashvarlena(PG_FUNCTION_ARGS); ! extern Datum hash_any(register const char *k, register int keylen); /* private routines */ /* hashinsert.c */ *************** *** 288,294 **** extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access); extern void _hash_relbuf(Relation rel, Buffer buf, int access); extern void _hash_wrtbuf(Relation rel, Buffer buf); ! extern void _hash_wrtnorelbuf(Relation rel, Buffer buf); extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access, int to_access); extern void _hash_pageinit(Page page, Size size); --- 287,293 ---- extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access); extern void _hash_relbuf(Relation rel, Buffer buf, int access); extern void _hash_wrtbuf(Relation rel, Buffer buf); ! extern void _hash_wrtnorelbuf(Buffer buf); extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access, int to_access); extern void _hash_pageinit(Page page, Size size); *************** *** 313,320 **** /* hashutil.c */ ! extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup, ! HashMetaPage metap); extern void _hash_freeskey(ScanKey skey); extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup); extern HashItem _hash_formitem(IndexTuple itup); --- 312,318 ---- /* hashutil.c */ ! extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup); extern void _hash_freeskey(ScanKey skey); extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup); extern HashItem _hash_formitem(IndexTuple itup); Index: src/include/utils/catcache.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/utils/catcache.h,v retrieving revision 1.39 diff -c -r1.39 catcache.h *** src/include/utils/catcache.h 2002/03/03 17:47:56 1.39 --- src/include/utils/catcache.h 2002/03/05 02:18:19 *************** *** 40,47 **** TupleDesc cc_tupdesc; /* tuple descriptor (copied from reldesc) */ int cc_reloidattr; /* AttrNumber of relation OID attr, or 0 */ int cc_ntup; /* # of tuples currently in this cache */ ! int cc_size; /* # of hash buckets in this cache */ ! int cc_nkeys; /* number of keys (1..4) */ int cc_key[4]; /* AttrNumber of each key */ PGFunction cc_hashfunc[4]; /* hash function to use for each key */ ScanKeyData cc_skey[4]; /* precomputed key info for heap scans */ --- 40,47 ---- TupleDesc cc_tupdesc; /* tuple descriptor (copied from reldesc) */ int cc_reloidattr; /* AttrNumber of relation OID attr, or 0 */ int cc_ntup; /* # of tuples currently in this cache */ ! int cc_nbuckets; /* # of hash buckets in this cache */ ! int cc_nkeys; /* # of keys (1..4) */ int cc_key[4]; /* AttrNumber of each key */ PGFunction cc_hashfunc[4]; /* hash function to use for each key */ ScanKeyData cc_skey[4]; /* precomputed key info for heap scans */