/* * Copyright 2000, International Business Machines Corporation and others. * All Rights Reserved. * * This software has been released under the terms of the IBM Public * License. For details, see the LICENSE file in the top-level source * directory or online at http://www.openafs.org/dl/license10.html */ #include #include #include "afs_atomlist.h" #include "afs_lhash.h" #ifndef KERNEL /* for now, only turn on assertions in user-space code */ #include #define CHECK_INVARIANTS #endif /* !KERNEL */ #ifndef NULL #define NULL 0 #endif /* max hash table load factor */ enum { LOAD_FACTOR = 5 }; struct bucket { struct bucket *next; void *data; unsigned key; }; struct afs_lhash { int (*equal) (const void *a, const void *b); void *(*allocate) (size_t n); void (*deallocate) (void *p, size_t n); size_t p; /* index of next bucket to be split */ size_t maxp; /* upper bound on p during this expansion */ size_t ndata; /* count of data records in hash */ size_t ltable; /* logical table size */ size_t ntable; /* physical table size */ struct bucket **table; afs_atomlist *bucket_list; /* hash bucket allocator */ size_t search_calls; /* cumulative afs_lhash_search() call count */ size_t search_tests; /* cumulative afs_lhash_search() comparison count */ size_t remove_calls; /* cumulative afs_lhash_remove() call count */ size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */ }; /* * make sure the given hash table can accomodate the given index * value; expand the bucket table if necessary * * returns 0 on success, <0 on failure */ static int afs_lhash_accomodate(afs_lhash * lh, size_t max_index) { /* * The usual approach to growing ntable to accomodate max_index * would be to double ntable enough times. This would give us * an exponential backoff in how many times we need to grow * ntable. However, there is a space tradeoff. * * Since afs_lhash may be used in an environment where memory is * constrained, we choose instead to grow ntable in fixed * increments. This may have a larger total cost, but it is * amortized in smaller increments. * * If even this cost is too great, we can consider adopting the * two-level array approach mentioned in the paper. This could * keep the sizes of the allocations more consistent, and also * reduce the amount of data copying. However, we won't do that * until we see real results that show that the benefit of the * additional complexity is worth it. */ enum { ntable_inc = 1024 / sizeof *lh->table }; size_t new_ntable; struct bucket **new_table; size_t i; /* if the given index fits, we're done */ if (max_index < lh->ntable) return 0; /* otherwise, determine new_ntable and allocate new_table */ if (lh->ntable == (size_t) 0) { new_ntable = ntable_inc; } else { new_ntable = lh->ntable; } for (; !(max_index < new_ntable); new_ntable += ntable_inc) continue; new_table = lh->allocate(new_ntable * sizeof *lh->table); if (!new_table) { return -1; } /* initialize new_table from the old table */ for (i = 0; i < lh->ntable; i++) { new_table[i] = lh->table[i]; } for (i = lh->ntable; i < new_ntable; i++) { new_table[i] = 0; } /* * free the old table, if any, and switch to the new table * * (when called from afs_lhash_create(), the table is empty) */ if (lh->table && lh->ntable) { lh->deallocate(lh->table, lh->ntable * sizeof *lh->table); } lh->ntable = new_ntable; lh->table = new_table; return 0; } /* * given a hash table and a key value, returns the index corresponding * to the key value */ static size_t afs_lhash_address(const afs_lhash * lh, unsigned key) { enum { prime = 1048583 }; size_t h; size_t address; h = key % prime; /* 0 <= h < prime */ address = h % lh->maxp; if (address < lh->p) { address = h % (2 * lh->maxp); } return address; } /* * if possible, expand the logical size of the given hash table */ static void afs_lhash_expand(afs_lhash * lh) { size_t old_address; /* index of bucket to split */ size_t new_address; /* index of new bucket */ struct bucket *current_b; /* for scanning down old chain */ struct bucket *previous; struct bucket *last_of_new; /* last element in new chain */ /* save address to split */ old_address = lh->p; /* determine new address, grow table if necessary */ new_address = lh->maxp + lh->p; if (afs_lhash_accomodate(lh, new_address) < 0) { return; } /* adjust the state variables */ /* this makes afs_lhash_address() work with respect * to the expanded table */ lh->p++; if (lh->p == lh->maxp) { lh->maxp *= 2; lh->p = 0; } lh->ltable++; #ifdef CHECK_INVARIANTS assert(lh->ltable - 1 == new_address); assert(lh->ltable <= lh->ntable); #endif /* CHECK_INVARIANTS */ /* relocate records to the new bucket */ current_b = lh->table[old_address]; previous = 0; last_of_new = 0; lh->table[new_address] = 0; while (current_b) { size_t addr; addr = afs_lhash_address(lh, current_b->key); if (addr == new_address) { /* attach it to the end of the new chain */ if (last_of_new) { last_of_new->next = current_b; } else { lh->table[new_address] = current_b; } if (previous) { previous->next = current_b->next; } else { lh->table[old_address] = current_b->next; } last_of_new = current_b; current_b = current_b->next; last_of_new->next = 0; } else { #ifdef CHECK_INVARIANTS assert(addr == old_address); #endif /* CHECK_INVARIANTS */ /* leave it on the old chain */ previous = current_b; current_b = current_b->next; } } } afs_lhash * afs_lhash_create(int (*equal) (const void *a, const void *b), /* returns true if the elements pointed to by * a and b are the same, false otherwise */ void *(*allocate) (size_t n), void (*deallocate) (void *p, size_t n) ) { afs_lhash *lh; lh = allocate(sizeof *lh); if (!lh) return (afs_lhash *) 0; lh->equal = equal; lh->allocate = allocate; lh->deallocate = deallocate; lh->p = 0; lh->maxp = 16; lh->ltable = lh->maxp; lh->ndata = 0; lh->ntable = 0; lh->table = NULL; if (afs_lhash_accomodate(lh, lh->ltable - 1) < 0) { lh->deallocate(lh, sizeof *lh); return (afs_lhash *) 0; } #ifdef CHECK_INVARIANTS assert(lh->ltable <= lh->ntable); #endif /* CHECK_INVARIANTS */ /* maybe the chunk size should be passed in for hashes, so we * can pass it down here */ lh->bucket_list = afs_atomlist_create(sizeof(struct bucket), sizeof(long) * 1024, allocate, deallocate); #ifdef CHECK_INVARIANTS assert(lh->bucket_list); #endif /* CHECK_INVARIANTS */ lh->search_calls = 0; lh->search_tests = 0; lh->remove_calls = 0; lh->remove_tests = 0; return lh; } void afs_lhash_destroy(afs_lhash * lh) { #ifdef CHECK_INVARIANTS assert(lh->ltable <= lh->ntable); #endif /* CHECK_INVARIANTS */ /* * first, free the buckets * * afs_atomlist_destroy() implicitly frees all the memory allocated * from the given afs_atomlist, so there is no need to go through * the hash table to explicitly free each bucket. */ afs_atomlist_destroy(lh->bucket_list); /* then, free the table and the afs_lhash */ lh->deallocate(lh->table, lh->ntable * sizeof *lh->table); lh->deallocate(lh, sizeof *lh); } void afs_lhash_iter(afs_lhash * lh, void (*f) (size_t index, unsigned key, void *data) ) { size_t i; #ifdef CHECK_INVARIANTS assert(lh->ltable <= lh->ntable); #endif /* CHECK_INVARIANTS */ for (i = 0; i < lh->ltable; i++) { struct bucket *current_b; for (current_b = lh->table[i]; current_b; current_b = current_b->next) { f(i, current_b->key, current_b->data); } } } void * afs_lhash_search(afs_lhash * lh, unsigned key, const void *data) { size_t k; struct bucket *previous; struct bucket *current_b; lh->search_calls++; k = afs_lhash_address(lh, key); for (previous = 0, current_b = lh->table[k]; current_b; previous = current_b, current_b = current_b->next) { lh->search_tests++; if (lh->equal(data, current_b->data)) { /* * Since we found what we were looking for, move * the bucket to the head of the chain. The * theory is that unused hash table entries will * be left at the end of the chain, where they * will not cause search times to increase. * * This optimization is both easy to understand * and cheap to execute, so we go ahead and do * it. * */ if (previous) { previous->next = current_b->next; current_b->next = lh->table[k]; lh->table[k] = current_b; } return current_b->data; } } return 0; } void * afs_lhash_rosearch(const afs_lhash * lh, unsigned key, const void *data) { size_t k; struct bucket *current_b; k = afs_lhash_address(lh, key); for (current_b = lh->table[k]; current_b; current_b = current_b->next) { if (lh->equal(data, current_b->data)) { return current_b->data; } } return 0; } void * afs_lhash_remove(afs_lhash * lh, unsigned key, const void *data) { size_t k; struct bucket **pprev; struct bucket *cur; lh->remove_calls++; k = afs_lhash_address(lh, key); for (pprev = 0, cur = lh->table[k]; cur; pprev = &cur->next, cur = cur->next) { lh->remove_tests++; if (lh->equal(data, cur->data)) { void *data = cur->data; if (pprev) { *pprev = cur->next; } else { lh->table[k] = cur->next; } afs_atomlist_put(lh->bucket_list, cur); lh->ndata--; return data; } } return 0; } int afs_lhash_enter(afs_lhash * lh, unsigned key, void *data) { size_t k; struct bucket *buck; buck = afs_atomlist_get(lh->bucket_list); if (!buck) { return -1; } buck->key = key; buck->data = data; k = afs_lhash_address(lh, key); buck->next = lh->table[k]; lh->table[k] = buck; lh->ndata++; /* * The load factor is the number of data items in the hash * table divided by the logical table length. We expand the * table when the load factor exceeds a predetermined limit * (LOAD_FACTOR). To avoid floating point, we multiply both * sides of the inequality by the logical table length... */ if (lh->ndata > LOAD_FACTOR * lh->ltable) { afs_lhash_expand(lh); #if 0 printf("lh->p = %d; lh->maxp = %d\n", lh->p, lh->maxp); #endif } #ifdef CHECK_INVARIANTS assert(lh->ndata <= LOAD_FACTOR * lh->ltable); #endif /* CHECK_INVARIANTS */ return 0; } int afs_lhash_stat(afs_lhash * lh, struct afs_lhash_stat *sb) { size_t k; int min_chain_length = -1; int max_chain_length = -1; size_t buckets = lh->ltable; size_t records = 0; if (!sb) { return -1; } #ifdef CHECK_INVARIANTS assert(lh->ltable <= lh->ntable); #endif /* CHECK_INVARIANTS */ for (k = 0; k < lh->ltable; k++) { struct bucket *buck; int chain_length = 0; for (buck = lh->table[k]; buck; buck = buck->next) { chain_length++; records++; } if (min_chain_length == -1) { min_chain_length = chain_length; } if (max_chain_length == -1) { max_chain_length = chain_length; } if (chain_length < min_chain_length) { min_chain_length = chain_length; } if (max_chain_length < chain_length) { max_chain_length = chain_length; } } sb->min_chain_length = min_chain_length; sb->max_chain_length = max_chain_length; sb->buckets = buckets; sb->records = records; #ifdef CHECK_INVARIANTS assert(lh->ndata == records); #endif /* CHECK_INVARIANTS */ sb->search_calls = lh->search_calls; sb->search_tests = lh->search_tests; sb->remove_calls = lh->remove_calls; sb->remove_tests = lh->remove_tests; return 0; }