2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 #include "../afs/afs_atomlist.h"
12 #include "../afs/afs_lhash.h"
14 #include "afs_atomlist.h"
15 #include "afs_lhash.h"
16 /* for now, only turn on assertions in user-space code */
18 #define CHECK_INVARIANTS
21 /* max hash table load factor */
22 enum { LOAD_FACTOR = 5 };
31 int (*equal)(const void *a, const void *b);
33 void *(*allocate)(size_t n);
35 void (*deallocate)(void *p, size_t n);
37 size_t p; /* index of next bucket to be split */
38 size_t maxp; /* upper bound on p during this expansion */
40 size_t ndata; /* count of data records in hash */
42 size_t ltable; /* logical table size */
44 size_t ntable; /* physical table size */
45 struct bucket **table;
47 afs_atomlist *bucket_list; /* hash bucket allocator */
49 size_t search_calls; /* cumulative afs_lhash_search() call count */
50 size_t search_tests; /* cumulative afs_lhash_search() comparison count */
51 size_t remove_calls; /* cumulative afs_lhash_remove() call count */
52 size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */
56 * make sure the given hash table can accomodate the given index
57 * value; expand the bucket table if necessary
59 * returns 0 on success, <0 on failure
69 * The usual approach to growing ntable to accomodate max_index
70 * would be to double ntable enough times. This would give us
71 * an exponential backoff in how many times we need to grow
72 * ntable. However, there is a space tradeoff.
74 * Since afs_lhash may be used in an environment where memory is
75 * constrained, we choose instead to grow ntable in fixed
76 * increments. This may have a larger total cost, but it is
77 * amortized in smaller increments.
79 * If even this cost is too great, we can consider adopting the
80 * two-level array approach mentioned in the paper. This could
81 * keep the sizes of the allocations more consistent, and also
82 * reduce the amount of data copying. However, we won't do that
83 * until we see real results that show that the benefit of the
84 * additional complexity is worth it.
86 enum { ntable_inc = 1024 / sizeof *lh->table };
89 struct bucket **new_table;
92 /* if the given index fits, we're done */
94 if (max_index < lh->ntable)
97 /* otherwise, determine new_ntable and allocate new_table */
99 if (lh->ntable == (size_t)0) {
100 new_ntable = ntable_inc;
102 new_ntable = lh->ntable;
105 for(; !(max_index < new_ntable); new_ntable += ntable_inc)
108 new_table = lh->allocate(new_ntable * sizeof *lh->table);
113 /* initialize new_table from the old table */
115 for (i = 0; i < lh->ntable; i++) {
116 new_table[i] = lh->table[i];
118 for (i = lh->ntable; i < new_ntable; i++) {
123 * free the old table, if any, and switch to the new table
125 * (when called from afs_lhash_create(), the table is empty)
128 if (lh->table && lh->ntable) {
129 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
131 lh->ntable = new_ntable;
132 lh->table = new_table;
138 * given a hash table and a key value, returns the index corresponding
148 enum { prime = 1048583 };
152 h = key % prime; /* 0 <= h < prime */
154 address = h % lh->maxp;
155 if (address < lh->p) {
156 address = h % (2 * lh->maxp);
163 * if possible, expand the logical size of the given hash table
170 size_t old_address; /* index of bucket to split */
171 size_t new_address; /* index of new bucket */
173 struct bucket *current; /* for scanning down old chain */
174 struct bucket *previous;
176 struct bucket *last_of_new; /* last element in new chain */
178 /* save address to split */
182 /* determine new address, grow table if necessary */
184 new_address = lh->maxp + lh->p;
186 if (afs_lhash_accomodate(lh, new_address) < 0) {
190 /* adjust the state variables */
192 /* this makes afs_lhash_address() work with respect
193 * to the expanded table */
196 if (lh->p == lh->maxp) {
203 #ifdef CHECK_INVARIANTS
204 assert(lh->ltable-1 == new_address);
205 assert(lh->ltable <= lh->ntable);
206 #endif /* CHECK_INVARIANTS */
208 /* relocate records to the new bucket */
210 current = lh->table[old_address];
213 lh->table[new_address] = 0;
217 addr = afs_lhash_address(lh, current->key);
218 if (addr == new_address) {
219 /* attach it to the end of the new chain */
221 last_of_new->next = current;
223 lh->table[new_address] = current;
226 previous->next = current->next;
228 lh->table[old_address] = current->next;
230 last_of_new = current;
231 current = current->next;
232 last_of_new->next = 0;
234 #ifdef CHECK_INVARIANTS
235 assert(addr == old_address);
236 #endif /* CHECK_INVARIANTS */
237 /* leave it on the old chain */
239 current = current->next;
246 int (*equal)(const void *a, const void *b),
247 /* returns true if the elements pointed to by
248 a and b are the same, false otherwise */
250 void *(*allocate)(size_t n),
252 void (*deallocate)(void *p, size_t n)
257 lh = allocate(sizeof *lh);
259 return (afs_lhash *)0;
262 lh->allocate = allocate;
263 lh->deallocate = deallocate;
268 lh->ltable = lh->maxp;
273 lh->table = (struct bucket **)0;
275 if (afs_lhash_accomodate(lh, lh->ltable-1) < 0) {
276 lh->deallocate(lh, sizeof *lh);
277 return (afs_lhash *)0;
279 #ifdef CHECK_INVARIANTS
280 assert(lh->ltable <= lh->ntable);
281 #endif /* CHECK_INVARIANTS */
283 /* maybe the chunk size should be passed in for hashes, so we
284 * can pass it down here */
286 lh->bucket_list = afs_atomlist_create(sizeof(struct bucket),
288 allocate, deallocate);
289 #ifdef CHECK_INVARIANTS
290 assert(lh->bucket_list);
291 #endif /* CHECK_INVARIANTS */
293 lh->search_calls = 0;
294 lh->search_tests = 0;
295 lh->remove_calls = 0;
296 lh->remove_tests = 0;
306 #ifdef CHECK_INVARIANTS
307 assert(lh->ltable <= lh->ntable);
308 #endif /* CHECK_INVARIANTS */
311 * first, free the buckets
313 * afs_atomlist_destroy() implicitly frees all the memory allocated
314 * from the given afs_atomlist, so there is no need to go through
315 * the hash table to explicitly free each bucket.
318 afs_atomlist_destroy(lh->bucket_list);
320 /* then, free the table and the afs_lhash */
322 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
323 lh->deallocate(lh, sizeof *lh);
329 void(*f)(size_t index, unsigned key, void *data)
334 #ifdef CHECK_INVARIANTS
335 assert(lh->ltable <= lh->ntable);
336 #endif /* CHECK_INVARIANTS */
338 for (i = 0; i < lh->ltable; i++) {
339 struct bucket *current;
341 for (current = lh->table[i];
343 current = current->next) {
344 f(i, current->key, current->data);
357 struct bucket *previous;
358 struct bucket *current;
362 k = afs_lhash_address(lh, key);
363 for (previous = 0, current = lh->table[k];
365 previous = current, current = current->next) {
367 if (lh->equal(data, current->data)) {
370 * Since we found what we were looking for, move
371 * the bucket to the head of the chain. The
372 * theory is that unused hash table entries will
373 * be left at the end of the chain, where they
374 * will not cause search times to increase.
376 * This optimization is both easy to understand
377 * and cheap to execute, so we go ahead and do
383 previous->next = current->next;
384 current->next = lh->table[k];
385 lh->table[k] = current;
388 return current->data;
403 struct bucket *current;
405 k = afs_lhash_address(lh, key);
406 for (current = lh->table[k];
408 current = current->next) {
409 if (lh->equal(data, current->data)) {
410 return current->data;
425 struct bucket **pprev;
430 k = afs_lhash_address(lh, key);
431 for (pprev = 0, cur = lh->table[k];
433 pprev = &cur->next, cur = cur->next) {
435 if (lh->equal(data, cur->data)) {
436 void *data = cur->data;
441 lh->table[k] = cur->next;
444 afs_atomlist_put(lh->bucket_list, cur);
465 buck = afs_atomlist_get(lh->bucket_list);
473 k = afs_lhash_address(lh, key);
475 buck->next = lh->table[k];
481 * The load factor is the number of data items in the hash
482 * table divided by the logical table length. We expand the
483 * table when the load factor exceeds a predetermined limit
484 * (LOAD_FACTOR). To avoid floating point, we multiply both
485 * sides of the inequality by the logical table length...
487 if (lh->ndata > LOAD_FACTOR * lh->ltable) {
488 afs_lhash_expand(lh);
490 printf("lh->p = %d; lh->maxp = %d\n",
495 #ifdef CHECK_INVARIANTS
496 assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
497 #endif /* CHECK_INVARIANTS */
505 struct afs_lhash_stat *sb
510 int min_chain_length = -1;
511 int max_chain_length = -1;
512 size_t buckets = lh->ltable;
519 #ifdef CHECK_INVARIANTS
520 assert(lh->ltable <= lh->ntable);
521 #endif /* CHECK_INVARIANTS */
523 for (k = 0; k < lh->ltable; k++) {
525 int chain_length = 0;
527 for (buck = lh->table[k]; buck; buck = buck->next) {
532 if (min_chain_length == -1) {
533 min_chain_length = chain_length;
536 if (max_chain_length == -1) {
537 max_chain_length = chain_length;
540 if (chain_length < min_chain_length) {
541 min_chain_length = chain_length;
544 if (max_chain_length < chain_length) {
545 max_chain_length = chain_length;
549 sb->min_chain_length = min_chain_length;
550 sb->max_chain_length = max_chain_length;
551 sb->buckets = buckets;
552 sb->records = records;
554 #ifdef CHECK_INVARIANTS
555 assert(lh->ndata == records);
556 #endif /* CHECK_INVARIANTS */
558 sb->search_calls = lh->search_calls;
559 sb->search_tests = lh->search_tests;
560 sb->remove_calls = lh->remove_calls;
561 sb->remove_tests = lh->remove_tests;