src/util/afs_lhash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12
  13 RCSID("$Header$");
  14
  15 #include "afs_atomlist.h"
  16 #include "afs_lhash.h"
  17 #ifndef KERNEL
  18 /* for now, only turn on assertions in user-space code */
  19 #include <assert.h>
  20 #define CHECK_INVARIANTS
  21 #endif /* !KERNEL */
  22
  23 #ifndef NULL
  24 #define NULL 0
  25 #endif
  26
  27 /* max hash table load factor */
  28 enum { LOAD_FACTOR = 5 };
  29
  30 struct bucket {
  31         struct bucket *next;
  32         void *data;
  33         unsigned key;
  34 };
  35
  36 struct afs_lhash {
  37         int (*equal)(const void *a, const void *b);
  38
  39         void *(*allocate)(size_t n);
  40
  41         void (*deallocate)(void *p, size_t n);
  42
  43         size_t          p;      /* index of next bucket to be split */
  44         size_t          maxp;   /* upper bound on p during this expansion */
  45
  46         size_t          ndata;  /* count of data records in hash */
  47
  48         size_t          ltable; /* logical table size */
  49
  50         size_t          ntable; /* physical table size */
  51         struct bucket **table;
  52
  53         afs_atomlist   *bucket_list;    /* hash bucket allocator */
  54
  55         size_t  search_calls;   /* cumulative afs_lhash_search() call count */
  56         size_t  search_tests;   /* cumulative afs_lhash_search() comparison count */
  57         size_t  remove_calls;   /* cumulative afs_lhash_remove() call count */
  58         size_t  remove_tests;   /* cumulative afs_lhash_remove() comparison count */
  59 };
  60
  61 /*
  62  * make sure the given hash table can accomodate the given index
  63  * value; expand the bucket table if necessary
  64  *
  65  * returns 0 on success, <0 on failure
  66  */
  67
  68 static int
  69 afs_lhash_accomodate(
  70     afs_lhash *lh,
  71     size_t max_index
  72 )
  73 {
  74         /*
  75          * The usual approach to growing ntable to accomodate max_index
  76          * would be to double ntable enough times.  This would give us
  77          * an exponential backoff in how many times we need to grow
  78          * ntable.  However, there is a space tradeoff.
  79          *
  80          * Since afs_lhash may be used in an environment where memory is
  81          * constrained, we choose instead to grow ntable in fixed
  82          * increments.  This may have a larger total cost, but it is
  83          * amortized in smaller increments.
  84          *
  85          * If even this cost is too great, we can consider adopting the
  86          * two-level array approach mentioned in the paper.  This could
  87          * keep the sizes of the allocations more consistent, and also
  88          * reduce the amount of data copying.  However, we won't do that
  89          * until we see real results that show that the benefit of the
  90          * additional complexity is worth it.
  91          */
  92         enum { ntable_inc = 1024 / sizeof *lh->table };
  93
  94         size_t          new_ntable;
  95         struct bucket **new_table;
  96         size_t          i;
  97
  98         /* if the given index fits, we're done */
  99
 100         if (max_index < lh->ntable)
 101                 return 0;
 102
 103         /* otherwise, determine new_ntable and allocate new_table */
 104
 105         if (lh->ntable == (size_t)0) {
 106                 new_ntable = ntable_inc;
 107         } else {
 108                 new_ntable = lh->ntable;
 109         }
 110
 111         for(; !(max_index < new_ntable); new_ntable += ntable_inc)
 112                 continue;
 113
 114         new_table = lh->allocate(new_ntable * sizeof *lh->table);
 115         if (!new_table) {
 116                 return -1;
 117         }
 118
 119         /* initialize new_table from the old table */
 120
 121         for (i = 0; i < lh->ntable; i++) {
 122                 new_table[i] = lh->table[i];
 123         }
 124         for (i = lh->ntable; i < new_ntable; i++) {
 125                 new_table[i] = 0;
 126         }
 127
 128         /*
 129          * free the old table, if any, and switch to the new table
 130          *
 131          * (when called from afs_lhash_create(), the table is empty)
 132          */
 133
 134         if (lh->table && lh->ntable) {
 135                 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 136         }
 137         lh->ntable = new_ntable;
 138         lh->table  = new_table;
 139
 140         return 0;
 141 }
 142
 143 /*
 144  * given a hash table and a key value, returns the index corresponding
 145  * to the key value
 146  */
 147
 148 static size_t
 149 afs_lhash_address(
 150     const afs_lhash *lh,
 151     unsigned key
 152 )
 153 {
 154         enum { prime = 1048583 };
 155         size_t h;
 156         size_t address;
 157
 158         h = key % prime;        /* 0 <= h < prime */
 159
 160         address = h % lh->maxp;
 161         if (address < lh->p) {
 162                 address = h % (2 * lh->maxp);
 163         }
 164
 165         return address;
 166 }
 167
 168 /*
 169  * if possible, expand the logical size of the given hash table
 170  */
 171 static void
 172 afs_lhash_expand(
 173     afs_lhash *lh
 174 )
 175 {
 176         size_t old_address;     /* index of bucket to split */
 177         size_t new_address;     /* index of new bucket */
 178
 179         struct bucket *current; /* for scanning down old chain */
 180         struct bucket *previous;
 181
 182         struct bucket *last_of_new;     /* last element in new chain */
 183
 184         /* save address to split */
 185
 186         old_address = lh->p;
 187
 188         /* determine new address, grow table if necessary */
 189
 190         new_address = lh->maxp + lh->p;
 191
 192         if (afs_lhash_accomodate(lh, new_address) < 0) {
 193                 return;
 194         }
 195
 196         /* adjust the state variables */
 197
 198         /* this makes afs_lhash_address() work with respect
 199          * to the expanded table */
 200
 201         lh->p++;
 202         if (lh->p == lh->maxp) {
 203                 lh->maxp *= 2;
 204                 lh->p = 0;
 205         }
 206
 207         lh->ltable++;
 208
 209 #ifdef CHECK_INVARIANTS
 210         assert(lh->ltable-1 == new_address);
 211         assert(lh->ltable <= lh->ntable);
 212 #endif  /* CHECK_INVARIANTS */
 213
 214         /* relocate records to the new bucket */
 215
 216         current = lh->table[old_address];
 217         previous = 0;
 218         last_of_new = 0;
 219         lh->table[new_address] = 0;
 220
 221         while (current) {
 222                 size_t addr;
 223                 addr = afs_lhash_address(lh, current->key);
 224                 if (addr == new_address) {
 225                         /* attach it to the end of the new chain */
 226                         if (last_of_new) {
 227                                 last_of_new->next = current;
 228                         } else {
 229                                 lh->table[new_address] = current;
 230                         }
 231                         if (previous) {
 232                                 previous->next = current->next;
 233                         } else {
 234                                 lh->table[old_address] = current->next;
 235                         }
 236                         last_of_new = current;
 237                         current = current->next;
 238                         last_of_new->next = 0;
 239                 } else {
 240 #ifdef CHECK_INVARIANTS
 241                         assert(addr == old_address);
 242 #endif  /* CHECK_INVARIANTS */
 243                         /* leave it on the old chain */
 244                         previous = current;
 245                         current = current->next;
 246                 }
 247         }
 248 }
 249
 250 afs_lhash *
 251 afs_lhash_create(
 252     int (*equal)(const void *a, const void *b),
 253         /* returns true if the elements pointed to by
 254            a and b are the same, false otherwise */
 255
 256     void *(*allocate)(size_t n),
 257
 258     void (*deallocate)(void *p, size_t n)
 259 )
 260 {
 261         afs_lhash *lh;
 262
 263         lh = allocate(sizeof *lh);
 264         if (!lh)
 265                 return (afs_lhash *)0;
 266
 267         lh->equal = equal;
 268         lh->allocate = allocate;
 269         lh->deallocate = deallocate;
 270
 271         lh->p = 0;
 272         lh->maxp = 16;
 273
 274         lh->ltable = lh->maxp;
 275
 276         lh->ndata = 0;
 277
 278         lh->ntable = 0;
 279         lh->table = NULL;
 280
 281         if (afs_lhash_accomodate(lh, lh->ltable-1) < 0) {
 282                 lh->deallocate(lh, sizeof *lh);
 283                 return (afs_lhash *)0;
 284         }
 285 #ifdef CHECK_INVARIANTS
 286         assert(lh->ltable <= lh->ntable);
 287 #endif  /* CHECK_INVARIANTS */
 288
 289         /* maybe the chunk size should be passed in for hashes, so we
 290          * can pass it down here */
 291
 292         lh->bucket_list = afs_atomlist_create(sizeof(struct bucket),
 293                                               sizeof(long) * 1024,
 294                                               allocate, deallocate);
 295 #ifdef CHECK_INVARIANTS
 296         assert(lh->bucket_list);
 297 #endif  /* CHECK_INVARIANTS */
 298
 299         lh->search_calls = 0;
 300         lh->search_tests = 0;
 301         lh->remove_calls = 0;
 302         lh->remove_tests = 0;
 303
 304         return lh;
 305 }
 306
 307 void
 308 afs_lhash_destroy(
 309     afs_lhash *lh
 310 )
 311 {
 312 #ifdef CHECK_INVARIANTS
 313         assert(lh->ltable <= lh->ntable);
 314 #endif  /* CHECK_INVARIANTS */
 315
 316         /*
 317          * first, free the buckets
 318          *
 319          * afs_atomlist_destroy() implicitly frees all the memory allocated
 320          * from the given afs_atomlist, so there is no need to go through
 321          * the hash table to explicitly free each bucket.
 322          */
 323
 324         afs_atomlist_destroy(lh->bucket_list);
 325
 326         /* then, free the table and the afs_lhash */
 327
 328         lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 329         lh->deallocate(lh, sizeof *lh);
 330 }
 331
 332 void
 333 afs_lhash_iter(
 334     afs_lhash *lh,
 335     void(*f)(size_t index, unsigned key, void *data)
 336 )
 337 {
 338         size_t i;
 339
 340 #ifdef CHECK_INVARIANTS
 341         assert(lh->ltable <= lh->ntable);
 342 #endif  /* CHECK_INVARIANTS */
 343
 344         for (i = 0; i < lh->ltable; i++) {
 345                 struct bucket *current;
 346
 347                 for (current = lh->table[i];
 348                      current;
 349                      current = current->next) {
 350                         f(i, current->key, current->data);
 351                 }
 352         }
 353 }
 354
 355 void *
 356 afs_lhash_search(
 357     afs_lhash *lh,
 358     unsigned key,
 359     const void *data
 360 )
 361 {
 362         size_t k;
 363         struct bucket *previous;
 364         struct bucket *current;
 365
 366         lh->search_calls++;
 367
 368         k = afs_lhash_address(lh, key);
 369         for (previous = 0, current = lh->table[k];
 370              current;
 371              previous = current, current = current->next) {
 372                 lh->search_tests++;
 373                 if (lh->equal(data, current->data)) {
 374
 375                         /*
 376                          * Since we found what we were looking for, move
 377                          * the bucket to the head of the chain.  The
 378                          * theory is that unused hash table entries will
 379                          * be left at the end of the chain, where they
 380                          * will not cause search times to increase.
 381                          *
 382                          * This optimization is both easy to understand
 383                          * and cheap to execute, so we go ahead and do
 384                          * it.
 385                          *
 386                          */
 387
 388                         if (previous) {
 389                                 previous->next = current->next;
 390                                 current->next = lh->table[k];
 391                                 lh->table[k] = current;
 392                         }
 393
 394                         return current->data;
 395                 }
 396         }
 397
 398         return 0;
 399 }
 400
 401 void *
 402 afs_lhash_rosearch(
 403     const afs_lhash *lh,
 404     unsigned key,
 405     const void *data
 406 )
 407 {
 408         size_t k;
 409         struct bucket *current;
 410
 411         k = afs_lhash_address(lh, key);
 412         for (current = lh->table[k];
 413              current;
 414              current = current->next) {
 415                 if (lh->equal(data, current->data)) {
 416                         return current->data;
 417                 }
 418         }
 419
 420         return 0;
 421 }
 422
 423 void *
 424 afs_lhash_remove(
 425     afs_lhash *lh,
 426     unsigned key,
 427     const void *data
 428 )
 429 {
 430         size_t k;
 431         struct bucket **pprev;
 432         struct bucket *cur;
 433
 434         lh->remove_calls++;
 435
 436         k = afs_lhash_address(lh, key);
 437         for (pprev = 0, cur = lh->table[k];
 438              cur;
 439              pprev = &cur->next, cur = cur->next) {
 440                 lh->remove_tests++;
 441                 if (lh->equal(data, cur->data)) {
 442                         void *data = cur->data;
 443
 444                         if (pprev) {
 445                                 *pprev = cur->next;
 446                         } else {
 447                                 lh->table[k] = cur->next;
 448                         }
 449
 450                         afs_atomlist_put(lh->bucket_list, cur);
 451
 452                         lh->ndata--;
 453
 454                         return data;
 455                 }
 456         }
 457
 458         return 0;
 459 }
 460
 461 int
 462 afs_lhash_enter(
 463     afs_lhash *lh,
 464     unsigned key,
 465     void *data
 466 )
 467 {
 468         size_t k;
 469         struct bucket *buck;
 470
 471         buck = afs_atomlist_get(lh->bucket_list);
 472         if (!buck) {
 473                 return -1;
 474         }
 475
 476         buck->key = key;
 477         buck->data = data;
 478
 479         k = afs_lhash_address(lh, key);
 480
 481         buck->next = lh->table[k];
 482         lh->table[k] = buck;
 483
 484         lh->ndata++;
 485
 486         /*
 487          * The load factor is the number of data items in the hash
 488          * table divided by the logical table length.  We expand the
 489          * table when the load factor exceeds a predetermined limit
 490          * (LOAD_FACTOR).  To avoid floating point, we multiply both
 491          * sides of the inequality by the logical table length...
 492          */
 493         if (lh->ndata > LOAD_FACTOR * lh->ltable) {
 494                 afs_lhash_expand(lh);
 495 #if 0
 496                 printf("lh->p = %d; lh->maxp = %d\n",
 497                         lh->p, lh->maxp);
 498 #endif
 499         }
 500
 501 #ifdef CHECK_INVARIANTS
 502         assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
 503 #endif  /* CHECK_INVARIANTS */
 504
 505         return 0;
 506 }
 507
 508 int
 509 afs_lhash_stat(
 510     afs_lhash *lh,
 511     struct afs_lhash_stat *sb
 512 )
 513 {
 514         size_t k;
 515
 516         int     min_chain_length = -1;
 517         int     max_chain_length = -1;
 518         size_t  buckets = lh->ltable;
 519         size_t  records = 0;
 520
 521         if (!sb) {
 522                 return -1;
 523         }
 524
 525 #ifdef CHECK_INVARIANTS
 526         assert(lh->ltable <= lh->ntable);
 527 #endif  /* CHECK_INVARIANTS */
 528
 529         for (k = 0; k < lh->ltable; k++) {
 530                 struct bucket *buck;
 531                 int chain_length = 0;
 532
 533                 for (buck = lh->table[k]; buck; buck = buck->next) {
 534                         chain_length++;
 535                         records++;
 536                 }
 537
 538                 if (min_chain_length == -1) {
 539                         min_chain_length = chain_length;
 540                 }
 541
 542                 if (max_chain_length == -1) {
 543                         max_chain_length = chain_length;
 544                 }
 545
 546                 if (chain_length < min_chain_length) {
 547                         min_chain_length = chain_length;
 548                 }
 549
 550                 if (max_chain_length < chain_length) {
 551                         max_chain_length = chain_length;
 552                 }
 553         }
 554
 555         sb->min_chain_length = min_chain_length;
 556         sb->max_chain_length = max_chain_length;
 557         sb->buckets = buckets;
 558         sb->records = records;
 559
 560 #ifdef CHECK_INVARIANTS
 561         assert(lh->ndata == records);
 562 #endif  /* CHECK_INVARIANTS */
 563
 564         sb->search_calls = lh->search_calls;
 565         sb->search_tests = lh->search_tests;
 566         sb->remove_calls = lh->remove_calls;
 567         sb->remove_tests = lh->remove_tests;
 568
 569         return 0;
 570 }