2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 #include "afs_atomlist.h"
16 #include "afs_lhash.h"
18 /* for now, only turn on assertions in user-space code */
20 #define CHECK_INVARIANTS
27 /* max hash table load factor */
28 enum { LOAD_FACTOR = 5 };
37 int (*equal)(const void *a, const void *b);
39 void *(*allocate)(size_t n);
41 void (*deallocate)(void *p, size_t n);
43 size_t p; /* index of next bucket to be split */
44 size_t maxp; /* upper bound on p during this expansion */
46 size_t ndata; /* count of data records in hash */
48 size_t ltable; /* logical table size */
50 size_t ntable; /* physical table size */
51 struct bucket **table;
53 afs_atomlist *bucket_list; /* hash bucket allocator */
55 size_t search_calls; /* cumulative afs_lhash_search() call count */
56 size_t search_tests; /* cumulative afs_lhash_search() comparison count */
57 size_t remove_calls; /* cumulative afs_lhash_remove() call count */
58 size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */
62 * make sure the given hash table can accomodate the given index
63 * value; expand the bucket table if necessary
65 * returns 0 on success, <0 on failure
75 * The usual approach to growing ntable to accomodate max_index
76 * would be to double ntable enough times. This would give us
77 * an exponential backoff in how many times we need to grow
78 * ntable. However, there is a space tradeoff.
80 * Since afs_lhash may be used in an environment where memory is
81 * constrained, we choose instead to grow ntable in fixed
82 * increments. This may have a larger total cost, but it is
83 * amortized in smaller increments.
85 * If even this cost is too great, we can consider adopting the
86 * two-level array approach mentioned in the paper. This could
87 * keep the sizes of the allocations more consistent, and also
88 * reduce the amount of data copying. However, we won't do that
89 * until we see real results that show that the benefit of the
90 * additional complexity is worth it.
92 enum { ntable_inc = 1024 / sizeof *lh->table };
95 struct bucket **new_table;
98 /* if the given index fits, we're done */
100 if (max_index < lh->ntable)
103 /* otherwise, determine new_ntable and allocate new_table */
105 if (lh->ntable == (size_t)0) {
106 new_ntable = ntable_inc;
108 new_ntable = lh->ntable;
111 for(; !(max_index < new_ntable); new_ntable += ntable_inc)
114 new_table = lh->allocate(new_ntable * sizeof *lh->table);
119 /* initialize new_table from the old table */
121 for (i = 0; i < lh->ntable; i++) {
122 new_table[i] = lh->table[i];
124 for (i = lh->ntable; i < new_ntable; i++) {
129 * free the old table, if any, and switch to the new table
131 * (when called from afs_lhash_create(), the table is empty)
134 if (lh->table && lh->ntable) {
135 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
137 lh->ntable = new_ntable;
138 lh->table = new_table;
144 * given a hash table and a key value, returns the index corresponding
154 enum { prime = 1048583 };
158 h = key % prime; /* 0 <= h < prime */
160 address = h % lh->maxp;
161 if (address < lh->p) {
162 address = h % (2 * lh->maxp);
169 * if possible, expand the logical size of the given hash table
176 size_t old_address; /* index of bucket to split */
177 size_t new_address; /* index of new bucket */
179 struct bucket *current; /* for scanning down old chain */
180 struct bucket *previous;
182 struct bucket *last_of_new; /* last element in new chain */
184 /* save address to split */
188 /* determine new address, grow table if necessary */
190 new_address = lh->maxp + lh->p;
192 if (afs_lhash_accomodate(lh, new_address) < 0) {
196 /* adjust the state variables */
198 /* this makes afs_lhash_address() work with respect
199 * to the expanded table */
202 if (lh->p == lh->maxp) {
209 #ifdef CHECK_INVARIANTS
210 assert(lh->ltable-1 == new_address);
211 assert(lh->ltable <= lh->ntable);
212 #endif /* CHECK_INVARIANTS */
214 /* relocate records to the new bucket */
216 current = lh->table[old_address];
219 lh->table[new_address] = 0;
223 addr = afs_lhash_address(lh, current->key);
224 if (addr == new_address) {
225 /* attach it to the end of the new chain */
227 last_of_new->next = current;
229 lh->table[new_address] = current;
232 previous->next = current->next;
234 lh->table[old_address] = current->next;
236 last_of_new = current;
237 current = current->next;
238 last_of_new->next = 0;
240 #ifdef CHECK_INVARIANTS
241 assert(addr == old_address);
242 #endif /* CHECK_INVARIANTS */
243 /* leave it on the old chain */
245 current = current->next;
252 int (*equal)(const void *a, const void *b),
253 /* returns true if the elements pointed to by
254 a and b are the same, false otherwise */
256 void *(*allocate)(size_t n),
258 void (*deallocate)(void *p, size_t n)
263 lh = allocate(sizeof *lh);
265 return (afs_lhash *)0;
268 lh->allocate = allocate;
269 lh->deallocate = deallocate;
274 lh->ltable = lh->maxp;
281 if (afs_lhash_accomodate(lh, lh->ltable-1) < 0) {
282 lh->deallocate(lh, sizeof *lh);
283 return (afs_lhash *)0;
285 #ifdef CHECK_INVARIANTS
286 assert(lh->ltable <= lh->ntable);
287 #endif /* CHECK_INVARIANTS */
289 /* maybe the chunk size should be passed in for hashes, so we
290 * can pass it down here */
292 lh->bucket_list = afs_atomlist_create(sizeof(struct bucket),
294 allocate, deallocate);
295 #ifdef CHECK_INVARIANTS
296 assert(lh->bucket_list);
297 #endif /* CHECK_INVARIANTS */
299 lh->search_calls = 0;
300 lh->search_tests = 0;
301 lh->remove_calls = 0;
302 lh->remove_tests = 0;
312 #ifdef CHECK_INVARIANTS
313 assert(lh->ltable <= lh->ntable);
314 #endif /* CHECK_INVARIANTS */
317 * first, free the buckets
319 * afs_atomlist_destroy() implicitly frees all the memory allocated
320 * from the given afs_atomlist, so there is no need to go through
321 * the hash table to explicitly free each bucket.
324 afs_atomlist_destroy(lh->bucket_list);
326 /* then, free the table and the afs_lhash */
328 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
329 lh->deallocate(lh, sizeof *lh);
335 void(*f)(size_t index, unsigned key, void *data)
340 #ifdef CHECK_INVARIANTS
341 assert(lh->ltable <= lh->ntable);
342 #endif /* CHECK_INVARIANTS */
344 for (i = 0; i < lh->ltable; i++) {
345 struct bucket *current;
347 for (current = lh->table[i];
349 current = current->next) {
350 f(i, current->key, current->data);
363 struct bucket *previous;
364 struct bucket *current;
368 k = afs_lhash_address(lh, key);
369 for (previous = 0, current = lh->table[k];
371 previous = current, current = current->next) {
373 if (lh->equal(data, current->data)) {
376 * Since we found what we were looking for, move
377 * the bucket to the head of the chain. The
378 * theory is that unused hash table entries will
379 * be left at the end of the chain, where they
380 * will not cause search times to increase.
382 * This optimization is both easy to understand
383 * and cheap to execute, so we go ahead and do
389 previous->next = current->next;
390 current->next = lh->table[k];
391 lh->table[k] = current;
394 return current->data;
409 struct bucket *current;
411 k = afs_lhash_address(lh, key);
412 for (current = lh->table[k];
414 current = current->next) {
415 if (lh->equal(data, current->data)) {
416 return current->data;
431 struct bucket **pprev;
436 k = afs_lhash_address(lh, key);
437 for (pprev = 0, cur = lh->table[k];
439 pprev = &cur->next, cur = cur->next) {
441 if (lh->equal(data, cur->data)) {
442 void *data = cur->data;
447 lh->table[k] = cur->next;
450 afs_atomlist_put(lh->bucket_list, cur);
471 buck = afs_atomlist_get(lh->bucket_list);
479 k = afs_lhash_address(lh, key);
481 buck->next = lh->table[k];
487 * The load factor is the number of data items in the hash
488 * table divided by the logical table length. We expand the
489 * table when the load factor exceeds a predetermined limit
490 * (LOAD_FACTOR). To avoid floating point, we multiply both
491 * sides of the inequality by the logical table length...
493 if (lh->ndata > LOAD_FACTOR * lh->ltable) {
494 afs_lhash_expand(lh);
496 printf("lh->p = %d; lh->maxp = %d\n",
501 #ifdef CHECK_INVARIANTS
502 assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
503 #endif /* CHECK_INVARIANTS */
511 struct afs_lhash_stat *sb
516 int min_chain_length = -1;
517 int max_chain_length = -1;
518 size_t buckets = lh->ltable;
525 #ifdef CHECK_INVARIANTS
526 assert(lh->ltable <= lh->ntable);
527 #endif /* CHECK_INVARIANTS */
529 for (k = 0; k < lh->ltable; k++) {
531 int chain_length = 0;
533 for (buck = lh->table[k]; buck; buck = buck->next) {
538 if (min_chain_length == -1) {
539 min_chain_length = chain_length;
542 if (max_chain_length == -1) {
543 max_chain_length = chain_length;
546 if (chain_length < min_chain_length) {
547 min_chain_length = chain_length;
550 if (max_chain_length < chain_length) {
551 max_chain_length = chain_length;
555 sb->min_chain_length = min_chain_length;
556 sb->max_chain_length = max_chain_length;
557 sb->buckets = buckets;
558 sb->records = records;
560 #ifdef CHECK_INVARIANTS
561 assert(lh->ndata == records);
562 #endif /* CHECK_INVARIANTS */
564 sb->search_calls = lh->search_calls;
565 sb->search_tests = lh->search_tests;
566 sb->remove_calls = lh->remove_calls;
567 sb->remove_tests = lh->remove_tests;