2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #include "afs_atomlist.h"
17 #include "afs_lhash.h"
19 #include "afs_atomlist.h"
20 #include "afs_lhash.h"
21 /* for now, only turn on assertions in user-space code */
23 #define CHECK_INVARIANTS
26 /* max hash table load factor */
27 enum { LOAD_FACTOR = 5 };
36 int (*equal)(const void *a, const void *b);
38 void *(*allocate)(size_t n);
40 void (*deallocate)(void *p, size_t n);
42 size_t p; /* index of next bucket to be split */
43 size_t maxp; /* upper bound on p during this expansion */
45 size_t ndata; /* count of data records in hash */
47 size_t ltable; /* logical table size */
49 size_t ntable; /* physical table size */
50 struct bucket **table;
52 afs_atomlist *bucket_list; /* hash bucket allocator */
54 size_t search_calls; /* cumulative afs_lhash_search() call count */
55 size_t search_tests; /* cumulative afs_lhash_search() comparison count */
56 size_t remove_calls; /* cumulative afs_lhash_remove() call count */
57 size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */
61 * make sure the given hash table can accomodate the given index
62 * value; expand the bucket table if necessary
64 * returns 0 on success, <0 on failure
74 * The usual approach to growing ntable to accomodate max_index
75 * would be to double ntable enough times. This would give us
76 * an exponential backoff in how many times we need to grow
77 * ntable. However, there is a space tradeoff.
79 * Since afs_lhash may be used in an environment where memory is
80 * constrained, we choose instead to grow ntable in fixed
81 * increments. This may have a larger total cost, but it is
82 * amortized in smaller increments.
84 * If even this cost is too great, we can consider adopting the
85 * two-level array approach mentioned in the paper. This could
86 * keep the sizes of the allocations more consistent, and also
87 * reduce the amount of data copying. However, we won't do that
88 * until we see real results that show that the benefit of the
89 * additional complexity is worth it.
91 enum { ntable_inc = 1024 / sizeof *lh->table };
94 struct bucket **new_table;
97 /* if the given index fits, we're done */
99 if (max_index < lh->ntable)
102 /* otherwise, determine new_ntable and allocate new_table */
104 if (lh->ntable == (size_t)0) {
105 new_ntable = ntable_inc;
107 new_ntable = lh->ntable;
110 for(; !(max_index < new_ntable); new_ntable += ntable_inc)
113 new_table = lh->allocate(new_ntable * sizeof *lh->table);
118 /* initialize new_table from the old table */
120 for (i = 0; i < lh->ntable; i++) {
121 new_table[i] = lh->table[i];
123 for (i = lh->ntable; i < new_ntable; i++) {
128 * free the old table, if any, and switch to the new table
130 * (when called from afs_lhash_create(), the table is empty)
133 if (lh->table && lh->ntable) {
134 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
136 lh->ntable = new_ntable;
137 lh->table = new_table;
143 * given a hash table and a key value, returns the index corresponding
153 enum { prime = 1048583 };
157 h = key % prime; /* 0 <= h < prime */
159 address = h % lh->maxp;
160 if (address < lh->p) {
161 address = h % (2 * lh->maxp);
168 * if possible, expand the logical size of the given hash table
175 size_t old_address; /* index of bucket to split */
176 size_t new_address; /* index of new bucket */
178 struct bucket *current; /* for scanning down old chain */
179 struct bucket *previous;
181 struct bucket *last_of_new; /* last element in new chain */
183 /* save address to split */
187 /* determine new address, grow table if necessary */
189 new_address = lh->maxp + lh->p;
191 if (afs_lhash_accomodate(lh, new_address) < 0) {
195 /* adjust the state variables */
197 /* this makes afs_lhash_address() work with respect
198 * to the expanded table */
201 if (lh->p == lh->maxp) {
208 #ifdef CHECK_INVARIANTS
209 assert(lh->ltable-1 == new_address);
210 assert(lh->ltable <= lh->ntable);
211 #endif /* CHECK_INVARIANTS */
213 /* relocate records to the new bucket */
215 current = lh->table[old_address];
218 lh->table[new_address] = 0;
222 addr = afs_lhash_address(lh, current->key);
223 if (addr == new_address) {
224 /* attach it to the end of the new chain */
226 last_of_new->next = current;
228 lh->table[new_address] = current;
231 previous->next = current->next;
233 lh->table[old_address] = current->next;
235 last_of_new = current;
236 current = current->next;
237 last_of_new->next = 0;
239 #ifdef CHECK_INVARIANTS
240 assert(addr == old_address);
241 #endif /* CHECK_INVARIANTS */
242 /* leave it on the old chain */
244 current = current->next;
251 int (*equal)(const void *a, const void *b),
252 /* returns true if the elements pointed to by
253 a and b are the same, false otherwise */
255 void *(*allocate)(size_t n),
257 void (*deallocate)(void *p, size_t n)
262 lh = allocate(sizeof *lh);
264 return (afs_lhash *)0;
267 lh->allocate = allocate;
268 lh->deallocate = deallocate;
273 lh->ltable = lh->maxp;
280 if (afs_lhash_accomodate(lh, lh->ltable-1) < 0) {
281 lh->deallocate(lh, sizeof *lh);
282 return (afs_lhash *)0;
284 #ifdef CHECK_INVARIANTS
285 assert(lh->ltable <= lh->ntable);
286 #endif /* CHECK_INVARIANTS */
288 /* maybe the chunk size should be passed in for hashes, so we
289 * can pass it down here */
291 lh->bucket_list = afs_atomlist_create(sizeof(struct bucket),
293 allocate, deallocate);
294 #ifdef CHECK_INVARIANTS
295 assert(lh->bucket_list);
296 #endif /* CHECK_INVARIANTS */
298 lh->search_calls = 0;
299 lh->search_tests = 0;
300 lh->remove_calls = 0;
301 lh->remove_tests = 0;
311 #ifdef CHECK_INVARIANTS
312 assert(lh->ltable <= lh->ntable);
313 #endif /* CHECK_INVARIANTS */
316 * first, free the buckets
318 * afs_atomlist_destroy() implicitly frees all the memory allocated
319 * from the given afs_atomlist, so there is no need to go through
320 * the hash table to explicitly free each bucket.
323 afs_atomlist_destroy(lh->bucket_list);
325 /* then, free the table and the afs_lhash */
327 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
328 lh->deallocate(lh, sizeof *lh);
334 void(*f)(size_t index, unsigned key, void *data)
339 #ifdef CHECK_INVARIANTS
340 assert(lh->ltable <= lh->ntable);
341 #endif /* CHECK_INVARIANTS */
343 for (i = 0; i < lh->ltable; i++) {
344 struct bucket *current;
346 for (current = lh->table[i];
348 current = current->next) {
349 f(i, current->key, current->data);
362 struct bucket *previous;
363 struct bucket *current;
367 k = afs_lhash_address(lh, key);
368 for (previous = 0, current = lh->table[k];
370 previous = current, current = current->next) {
372 if (lh->equal(data, current->data)) {
375 * Since we found what we were looking for, move
376 * the bucket to the head of the chain. The
377 * theory is that unused hash table entries will
378 * be left at the end of the chain, where they
379 * will not cause search times to increase.
381 * This optimization is both easy to understand
382 * and cheap to execute, so we go ahead and do
388 previous->next = current->next;
389 current->next = lh->table[k];
390 lh->table[k] = current;
393 return current->data;
408 struct bucket *current;
410 k = afs_lhash_address(lh, key);
411 for (current = lh->table[k];
413 current = current->next) {
414 if (lh->equal(data, current->data)) {
415 return current->data;
430 struct bucket **pprev;
435 k = afs_lhash_address(lh, key);
436 for (pprev = 0, cur = lh->table[k];
438 pprev = &cur->next, cur = cur->next) {
440 if (lh->equal(data, cur->data)) {
441 void *data = cur->data;
446 lh->table[k] = cur->next;
449 afs_atomlist_put(lh->bucket_list, cur);
470 buck = afs_atomlist_get(lh->bucket_list);
478 k = afs_lhash_address(lh, key);
480 buck->next = lh->table[k];
486 * The load factor is the number of data items in the hash
487 * table divided by the logical table length. We expand the
488 * table when the load factor exceeds a predetermined limit
489 * (LOAD_FACTOR). To avoid floating point, we multiply both
490 * sides of the inequality by the logical table length...
492 if (lh->ndata > LOAD_FACTOR * lh->ltable) {
493 afs_lhash_expand(lh);
495 printf("lh->p = %d; lh->maxp = %d\n",
500 #ifdef CHECK_INVARIANTS
501 assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
502 #endif /* CHECK_INVARIANTS */
510 struct afs_lhash_stat *sb
515 int min_chain_length = -1;
516 int max_chain_length = -1;
517 size_t buckets = lh->ltable;
524 #ifdef CHECK_INVARIANTS
525 assert(lh->ltable <= lh->ntable);
526 #endif /* CHECK_INVARIANTS */
528 for (k = 0; k < lh->ltable; k++) {
530 int chain_length = 0;
532 for (buck = lh->table[k]; buck; buck = buck->next) {
537 if (min_chain_length == -1) {
538 min_chain_length = chain_length;
541 if (max_chain_length == -1) {
542 max_chain_length = chain_length;
545 if (chain_length < min_chain_length) {
546 min_chain_length = chain_length;
549 if (max_chain_length < chain_length) {
550 max_chain_length = chain_length;
554 sb->min_chain_length = min_chain_length;
555 sb->max_chain_length = max_chain_length;
556 sb->buckets = buckets;
557 sb->records = records;
559 #ifdef CHECK_INVARIANTS
560 assert(lh->ndata == records);
561 #endif /* CHECK_INVARIANTS */
563 sb->search_calls = lh->search_calls;
564 sb->search_tests = lh->search_tests;
565 sb->remove_calls = lh->remove_calls;
566 sb->remove_tests = lh->remove_tests;