2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
14 #include "afs_atomlist.h"
15 #include "afs_lhash.h"
17 /* for now, only turn on assertions in user-space code */
19 #define CHECK_INVARIANTS
26 /* max hash table load factor */
27 enum { LOAD_FACTOR = 5 };
36 int (*equal) (const void *a, const void *b);
38 void *(*allocate) (size_t n);
40 void (*deallocate) (void *p, size_t n);
42 size_t p; /* index of next bucket to be split */
43 size_t maxp; /* upper bound on p during this expansion */
45 size_t ndata; /* count of data records in hash */
47 size_t ltable; /* logical table size */
49 size_t ntable; /* physical table size */
50 struct bucket **table;
52 afs_atomlist *bucket_list; /* hash bucket allocator */
54 size_t search_calls; /* cumulative afs_lhash_search() call count */
55 size_t search_tests; /* cumulative afs_lhash_search() comparison count */
56 size_t remove_calls; /* cumulative afs_lhash_remove() call count */
57 size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */
61 * make sure the given hash table can accomodate the given index
62 * value; expand the bucket table if necessary
64 * returns 0 on success, <0 on failure
68 afs_lhash_accomodate(afs_lhash * lh, size_t max_index)
71 * The usual approach to growing ntable to accomodate max_index
72 * would be to double ntable enough times. This would give us
73 * an exponential backoff in how many times we need to grow
74 * ntable. However, there is a space tradeoff.
76 * Since afs_lhash may be used in an environment where memory is
77 * constrained, we choose instead to grow ntable in fixed
78 * increments. This may have a larger total cost, but it is
79 * amortized in smaller increments.
81 * If even this cost is too great, we can consider adopting the
82 * two-level array approach mentioned in the paper. This could
83 * keep the sizes of the allocations more consistent, and also
84 * reduce the amount of data copying. However, we won't do that
85 * until we see real results that show that the benefit of the
86 * additional complexity is worth it.
88 enum { ntable_inc = 1024 / sizeof *lh->table };
91 struct bucket **new_table;
94 /* if the given index fits, we're done */
96 if (max_index < lh->ntable)
99 /* otherwise, determine new_ntable and allocate new_table */
101 if (lh->ntable == (size_t) 0) {
102 new_ntable = ntable_inc;
104 new_ntable = lh->ntable;
107 for (; !(max_index < new_ntable); new_ntable += ntable_inc)
110 new_table = lh->allocate(new_ntable * sizeof *lh->table);
115 /* initialize new_table from the old table */
117 for (i = 0; i < lh->ntable; i++) {
118 new_table[i] = lh->table[i];
120 for (i = lh->ntable; i < new_ntable; i++) {
125 * free the old table, if any, and switch to the new table
127 * (when called from afs_lhash_create(), the table is empty)
130 if (lh->table && lh->ntable) {
131 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
133 lh->ntable = new_ntable;
134 lh->table = new_table;
140 * given a hash table and a key value, returns the index corresponding
145 afs_lhash_address(const afs_lhash * lh, unsigned key)
147 enum { prime = 1048583 };
151 h = key % prime; /* 0 <= h < prime */
153 address = h % lh->maxp;
154 if (address < lh->p) {
155 address = h % (2 * lh->maxp);
162 * if possible, expand the logical size of the given hash table
165 afs_lhash_expand(afs_lhash * lh)
167 size_t old_address; /* index of bucket to split */
168 size_t new_address; /* index of new bucket */
170 struct bucket *current_b; /* for scanning down old chain */
171 struct bucket *previous;
173 struct bucket *last_of_new; /* last element in new chain */
175 /* save address to split */
179 /* determine new address, grow table if necessary */
181 new_address = lh->maxp + lh->p;
183 if (afs_lhash_accomodate(lh, new_address) < 0) {
187 /* adjust the state variables */
189 /* this makes afs_lhash_address() work with respect
190 * to the expanded table */
193 if (lh->p == lh->maxp) {
200 #ifdef CHECK_INVARIANTS
201 assert(lh->ltable - 1 == new_address);
202 assert(lh->ltable <= lh->ntable);
203 #endif /* CHECK_INVARIANTS */
205 /* relocate records to the new bucket */
207 current_b = lh->table[old_address];
210 lh->table[new_address] = 0;
214 addr = afs_lhash_address(lh, current_b->key);
215 if (addr == new_address) {
216 /* attach it to the end of the new chain */
218 last_of_new->next = current_b;
220 lh->table[new_address] = current_b;
223 previous->next = current_b->next;
225 lh->table[old_address] = current_b->next;
227 last_of_new = current_b;
228 current_b = current_b->next;
229 last_of_new->next = 0;
231 #ifdef CHECK_INVARIANTS
232 assert(addr == old_address);
233 #endif /* CHECK_INVARIANTS */
234 /* leave it on the old chain */
235 previous = current_b;
236 current_b = current_b->next;
242 afs_lhash_create(int (*equal) (const void *a, const void *b),
243 /* returns true if the elements pointed to by
244 * a and b are the same, false otherwise */
245 void *(*allocate) (size_t n), void (*deallocate) (void *p,
251 lh = allocate(sizeof *lh);
253 return (afs_lhash *) 0;
256 lh->allocate = allocate;
257 lh->deallocate = deallocate;
262 lh->ltable = lh->maxp;
269 if (afs_lhash_accomodate(lh, lh->ltable - 1) < 0) {
270 lh->deallocate(lh, sizeof *lh);
271 return (afs_lhash *) 0;
273 #ifdef CHECK_INVARIANTS
274 assert(lh->ltable <= lh->ntable);
275 #endif /* CHECK_INVARIANTS */
277 /* maybe the chunk size should be passed in for hashes, so we
278 * can pass it down here */
281 afs_atomlist_create(sizeof(struct bucket), sizeof(long) * 1024,
282 allocate, deallocate);
283 #ifdef CHECK_INVARIANTS
284 assert(lh->bucket_list);
285 #endif /* CHECK_INVARIANTS */
287 lh->search_calls = 0;
288 lh->search_tests = 0;
289 lh->remove_calls = 0;
290 lh->remove_tests = 0;
296 afs_lhash_destroy(afs_lhash * lh)
298 #ifdef CHECK_INVARIANTS
299 assert(lh->ltable <= lh->ntable);
300 #endif /* CHECK_INVARIANTS */
303 * first, free the buckets
305 * afs_atomlist_destroy() implicitly frees all the memory allocated
306 * from the given afs_atomlist, so there is no need to go through
307 * the hash table to explicitly free each bucket.
310 afs_atomlist_destroy(lh->bucket_list);
312 /* then, free the table and the afs_lhash */
314 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
315 lh->deallocate(lh, sizeof *lh);
319 afs_lhash_iter(afs_lhash * lh,
320 void (*f) (size_t index, unsigned key, void *data)
325 #ifdef CHECK_INVARIANTS
326 assert(lh->ltable <= lh->ntable);
327 #endif /* CHECK_INVARIANTS */
329 for (i = 0; i < lh->ltable; i++) {
330 struct bucket *current_b;
332 for (current_b = lh->table[i]; current_b; current_b = current_b->next) {
333 f(i, current_b->key, current_b->data);
339 afs_lhash_search(afs_lhash * lh, unsigned key, const void *data)
342 struct bucket *previous;
343 struct bucket *current_b;
347 k = afs_lhash_address(lh, key);
348 for (previous = 0, current_b = lh->table[k]; current_b;
349 previous = current_b, current_b = current_b->next) {
351 if (lh->equal(data, current_b->data)) {
354 * Since we found what we were looking for, move
355 * the bucket to the head of the chain. The
356 * theory is that unused hash table entries will
357 * be left at the end of the chain, where they
358 * will not cause search times to increase.
360 * This optimization is both easy to understand
361 * and cheap to execute, so we go ahead and do
367 previous->next = current_b->next;
368 current_b->next = lh->table[k];
369 lh->table[k] = current_b;
372 return current_b->data;
380 afs_lhash_rosearch(const afs_lhash * lh, unsigned key, const void *data)
383 struct bucket *current_b;
385 k = afs_lhash_address(lh, key);
386 for (current_b = lh->table[k]; current_b; current_b = current_b->next) {
387 if (lh->equal(data, current_b->data)) {
388 return current_b->data;
396 afs_lhash_remove(afs_lhash * lh, unsigned key, const void *data)
399 struct bucket **pprev;
404 k = afs_lhash_address(lh, key);
405 for (pprev = 0, cur = lh->table[k]; cur;
406 pprev = &cur->next, cur = cur->next) {
408 if (lh->equal(data, cur->data)) {
409 void *ptr = cur->data;
414 lh->table[k] = cur->next;
417 afs_atomlist_put(lh->bucket_list, cur);
429 afs_lhash_enter(afs_lhash * lh, unsigned key, void *data)
434 buck = afs_atomlist_get(lh->bucket_list);
442 k = afs_lhash_address(lh, key);
444 buck->next = lh->table[k];
450 * The load factor is the number of data items in the hash
451 * table divided by the logical table length. We expand the
452 * table when the load factor exceeds a predetermined limit
453 * (LOAD_FACTOR). To avoid floating point, we multiply both
454 * sides of the inequality by the logical table length...
456 if (lh->ndata > LOAD_FACTOR * lh->ltable) {
457 afs_lhash_expand(lh);
459 printf("lh->p = %d; lh->maxp = %d\n", lh->p, lh->maxp);
462 #ifdef CHECK_INVARIANTS
463 assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
464 #endif /* CHECK_INVARIANTS */
470 afs_lhash_stat(afs_lhash * lh, struct afs_lhash_stat *sb)
474 int min_chain_length = -1;
475 int max_chain_length = -1;
476 size_t buckets = lh->ltable;
482 #ifdef CHECK_INVARIANTS
483 assert(lh->ltable <= lh->ntable);
484 #endif /* CHECK_INVARIANTS */
486 for (k = 0; k < lh->ltable; k++) {
488 int chain_length = 0;
490 for (buck = lh->table[k]; buck; buck = buck->next) {
495 if (min_chain_length == -1) {
496 min_chain_length = chain_length;
499 if (max_chain_length == -1) {
500 max_chain_length = chain_length;
503 if (chain_length < min_chain_length) {
504 min_chain_length = chain_length;
507 if (max_chain_length < chain_length) {
508 max_chain_length = chain_length;
512 sb->min_chain_length = min_chain_length;
513 sb->max_chain_length = max_chain_length;
514 sb->buckets = buckets;
515 sb->records = records;
517 #ifdef CHECK_INVARIANTS
518 assert(lh->ndata == records);
519 #endif /* CHECK_INVARIANTS */
521 sb->search_calls = lh->search_calls;
522 sb->search_tests = lh->search_tests;
523 sb->remove_calls = lh->remove_calls;
524 sb->remove_tests = lh->remove_tests;