src/util/afs_lhash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12
  13 RCSID("$Header$");
  14
  15 #ifdef KERNEL
  16 #include "afs_atomlist.h"
  17 #include "afs_lhash.h"
  18 #else /* KERNEL */
  19 #include "afs_atomlist.h"
  20 #include "afs_lhash.h"
  21 /* for now, only turn on assertions in user-space code */
  22 #include <assert.h>
  23 #define CHECK_INVARIANTS
  24 #endif /* KERNEL */
  25
  26 /* max hash table load factor */
  27 enum { LOAD_FACTOR = 5 };
  28
  29 struct bucket {
  30         struct bucket *next;
  31         void *data;
  32         unsigned key;
  33 };
  34
  35 struct afs_lhash {
  36         int (*equal)(const void *a, const void *b);
  37
  38         void *(*allocate)(size_t n);
  39
  40         void (*deallocate)(void *p, size_t n);
  41
  42         size_t          p;      /* index of next bucket to be split */
  43         size_t          maxp;   /* upper bound on p during this expansion */
  44
  45         size_t          ndata;  /* count of data records in hash */
  46
  47         size_t          ltable; /* logical table size */
  48
  49         size_t          ntable; /* physical table size */
  50         struct bucket **table;
  51
  52         afs_atomlist   *bucket_list;    /* hash bucket allocator */
  53
  54         size_t  search_calls;   /* cumulative afs_lhash_search() call count */
  55         size_t  search_tests;   /* cumulative afs_lhash_search() comparison count */
  56         size_t  remove_calls;   /* cumulative afs_lhash_remove() call count */
  57         size_t  remove_tests;   /* cumulative afs_lhash_remove() comparison count */
  58 };
  59
  60 /*
  61  * make sure the given hash table can accomodate the given index
  62  * value; expand the bucket table if necessary
  63  *
  64  * returns 0 on success, <0 on failure
  65  */
  66
  67 static int
  68 afs_lhash_accomodate(
  69     afs_lhash *lh,
  70     size_t max_index
  71 )
  72 {
  73         /*
  74          * The usual approach to growing ntable to accomodate max_index
  75          * would be to double ntable enough times.  This would give us
  76          * an exponential backoff in how many times we need to grow
  77          * ntable.  However, there is a space tradeoff.
  78          *
  79          * Since afs_lhash may be used in an environment where memory is
  80          * constrained, we choose instead to grow ntable in fixed
  81          * increments.  This may have a larger total cost, but it is
  82          * amortized in smaller increments.
  83          *
  84          * If even this cost is too great, we can consider adopting the
  85          * two-level array approach mentioned in the paper.  This could
  86          * keep the sizes of the allocations more consistent, and also
  87          * reduce the amount of data copying.  However, we won't do that
  88          * until we see real results that show that the benefit of the
  89          * additional complexity is worth it.
  90          */
  91         enum { ntable_inc = 1024 / sizeof *lh->table };
  92
  93         size_t          new_ntable;
  94         struct bucket **new_table;
  95         size_t          i;
  96
  97         /* if the given index fits, we're done */
  98
  99         if (max_index < lh->ntable)
 100                 return 0;
 101
 102         /* otherwise, determine new_ntable and allocate new_table */
 103
 104         if (lh->ntable == (size_t)0) {
 105                 new_ntable = ntable_inc;
 106         } else {
 107                 new_ntable = lh->ntable;
 108         }
 109
 110         for(; !(max_index < new_ntable); new_ntable += ntable_inc)
 111                 continue;
 112
 113         new_table = lh->allocate(new_ntable * sizeof *lh->table);
 114         if (!new_table) {
 115                 return -1;
 116         }
 117
 118         /* initialize new_table from the old table */
 119
 120         for (i = 0; i < lh->ntable; i++) {
 121                 new_table[i] = lh->table[i];
 122         }
 123         for (i = lh->ntable; i < new_ntable; i++) {
 124                 new_table[i] = 0;
 125         }
 126
 127         /*
 128          * free the old table, if any, and switch to the new table
 129          *
 130          * (when called from afs_lhash_create(), the table is empty)
 131          */
 132
 133         if (lh->table && lh->ntable) {
 134                 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 135         }
 136         lh->ntable = new_ntable;
 137         lh->table  = new_table;
 138
 139         return 0;
 140 }
 141
 142 /*
 143  * given a hash table and a key value, returns the index corresponding
 144  * to the key value
 145  */
 146
 147 static size_t
 148 afs_lhash_address(
 149     const afs_lhash *lh,
 150     unsigned key
 151 )
 152 {
 153         enum { prime = 1048583 };
 154         size_t h;
 155         size_t address;
 156
 157         h = key % prime;        /* 0 <= h < prime */
 158
 159         address = h % lh->maxp;
 160         if (address < lh->p) {
 161                 address = h % (2 * lh->maxp);
 162         }
 163
 164         return address;
 165 }
 166
 167 /*
 168  * if possible, expand the logical size of the given hash table
 169  */
 170 static void
 171 afs_lhash_expand(
 172     afs_lhash *lh
 173 )
 174 {
 175         size_t old_address;     /* index of bucket to split */
 176         size_t new_address;     /* index of new bucket */
 177
 178         struct bucket *current; /* for scanning down old chain */
 179         struct bucket *previous;
 180
 181         struct bucket *last_of_new;     /* last element in new chain */
 182
 183         /* save address to split */
 184
 185         old_address = lh->p;
 186
 187         /* determine new address, grow table if necessary */
 188
 189         new_address = lh->maxp + lh->p;
 190
 191         if (afs_lhash_accomodate(lh, new_address) < 0) {
 192                 return;
 193         }
 194
 195         /* adjust the state variables */
 196
 197         /* this makes afs_lhash_address() work with respect
 198          * to the expanded table */
 199
 200         lh->p++;
 201         if (lh->p == lh->maxp) {
 202                 lh->maxp *= 2;
 203                 lh->p = 0;
 204         }
 205
 206         lh->ltable++;
 207
 208 #ifdef CHECK_INVARIANTS
 209         assert(lh->ltable-1 == new_address);
 210         assert(lh->ltable <= lh->ntable);
 211 #endif  /* CHECK_INVARIANTS */
 212
 213         /* relocate records to the new bucket */
 214
 215         current = lh->table[old_address];
 216         previous = 0;
 217         last_of_new = 0;
 218         lh->table[new_address] = 0;
 219
 220         while (current) {
 221                 size_t addr;
 222                 addr = afs_lhash_address(lh, current->key);
 223                 if (addr == new_address) {
 224                         /* attach it to the end of the new chain */
 225                         if (last_of_new) {
 226                                 last_of_new->next = current;
 227                         } else {
 228                                 lh->table[new_address] = current;
 229                         }
 230                         if (previous) {
 231                                 previous->next = current->next;
 232                         } else {
 233                                 lh->table[old_address] = current->next;
 234                         }
 235                         last_of_new = current;
 236                         current = current->next;
 237                         last_of_new->next = 0;
 238                 } else {
 239 #ifdef CHECK_INVARIANTS
 240                         assert(addr == old_address);
 241 #endif  /* CHECK_INVARIANTS */
 242                         /* leave it on the old chain */
 243                         previous = current;
 244                         current = current->next;
 245                 }
 246         }
 247 }
 248
 249 afs_lhash *
 250 afs_lhash_create(
 251     int (*equal)(const void *a, const void *b),
 252         /* returns true if the elements pointed to by
 253            a and b are the same, false otherwise */
 254
 255     void *(*allocate)(size_t n),
 256
 257     void (*deallocate)(void *p, size_t n)
 258 )
 259 {
 260         afs_lhash *lh;
 261
 262         lh = allocate(sizeof *lh);
 263         if (!lh)
 264                 return (afs_lhash *)0;
 265
 266         lh->equal = equal;
 267         lh->allocate = allocate;
 268         lh->deallocate = deallocate;
 269
 270         lh->p = 0;
 271         lh->maxp = 16;
 272
 273         lh->ltable = lh->maxp;
 274
 275         lh->ndata = 0;
 276
 277         lh->ntable = 0;
 278         lh->table = NULL;
 279
 280         if (afs_lhash_accomodate(lh, lh->ltable-1) < 0) {
 281                 lh->deallocate(lh, sizeof *lh);
 282                 return (afs_lhash *)0;
 283         }
 284 #ifdef CHECK_INVARIANTS
 285         assert(lh->ltable <= lh->ntable);
 286 #endif  /* CHECK_INVARIANTS */
 287
 288         /* maybe the chunk size should be passed in for hashes, so we
 289          * can pass it down here */
 290
 291         lh->bucket_list = afs_atomlist_create(sizeof(struct bucket),
 292                                               sizeof(long) * 1024,
 293                                               allocate, deallocate);
 294 #ifdef CHECK_INVARIANTS
 295         assert(lh->bucket_list);
 296 #endif  /* CHECK_INVARIANTS */
 297
 298         lh->search_calls = 0;
 299         lh->search_tests = 0;
 300         lh->remove_calls = 0;
 301         lh->remove_tests = 0;
 302
 303         return lh;
 304 }
 305
 306 void
 307 afs_lhash_destroy(
 308     afs_lhash *lh
 309 )
 310 {
 311 #ifdef CHECK_INVARIANTS
 312         assert(lh->ltable <= lh->ntable);
 313 #endif  /* CHECK_INVARIANTS */
 314
 315         /*
 316          * first, free the buckets
 317          *
 318          * afs_atomlist_destroy() implicitly frees all the memory allocated
 319          * from the given afs_atomlist, so there is no need to go through
 320          * the hash table to explicitly free each bucket.
 321          */
 322
 323         afs_atomlist_destroy(lh->bucket_list);
 324
 325         /* then, free the table and the afs_lhash */
 326
 327         lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 328         lh->deallocate(lh, sizeof *lh);
 329 }
 330
 331 void
 332 afs_lhash_iter(
 333     afs_lhash *lh,
 334     void(*f)(size_t index, unsigned key, void *data)
 335 )
 336 {
 337         size_t i;
 338
 339 #ifdef CHECK_INVARIANTS
 340         assert(lh->ltable <= lh->ntable);
 341 #endif  /* CHECK_INVARIANTS */
 342
 343         for (i = 0; i < lh->ltable; i++) {
 344                 struct bucket *current;
 345
 346                 for (current = lh->table[i];
 347                      current;
 348                      current = current->next) {
 349                         f(i, current->key, current->data);
 350                 }
 351         }
 352 }
 353
 354 void *
 355 afs_lhash_search(
 356     afs_lhash *lh,
 357     unsigned key,
 358     const void *data
 359 )
 360 {
 361         size_t k;
 362         struct bucket *previous;
 363         struct bucket *current;
 364
 365         lh->search_calls++;
 366
 367         k = afs_lhash_address(lh, key);
 368         for (previous = 0, current = lh->table[k];
 369              current;
 370              previous = current, current = current->next) {
 371                 lh->search_tests++;
 372                 if (lh->equal(data, current->data)) {
 373
 374                         /*
 375                          * Since we found what we were looking for, move
 376                          * the bucket to the head of the chain.  The
 377                          * theory is that unused hash table entries will
 378                          * be left at the end of the chain, where they
 379                          * will not cause search times to increase.
 380                          *
 381                          * This optimization is both easy to understand
 382                          * and cheap to execute, so we go ahead and do
 383                          * it.
 384                          *
 385                          */
 386
 387                         if (previous) {
 388                                 previous->next = current->next;
 389                                 current->next = lh->table[k];
 390                                 lh->table[k] = current;
 391                         }
 392
 393                         return current->data;
 394                 }
 395         }
 396
 397         return 0;
 398 }
 399
 400 void *
 401 afs_lhash_rosearch(
 402     const afs_lhash *lh,
 403     unsigned key,
 404     const void *data
 405 )
 406 {
 407         size_t k;
 408         struct bucket *current;
 409
 410         k = afs_lhash_address(lh, key);
 411         for (current = lh->table[k];
 412              current;
 413              current = current->next) {
 414                 if (lh->equal(data, current->data)) {
 415                         return current->data;
 416                 }
 417         }
 418
 419         return 0;
 420 }
 421
 422 void *
 423 afs_lhash_remove(
 424     afs_lhash *lh,
 425     unsigned key,
 426     const void *data
 427 )
 428 {
 429         size_t k;
 430         struct bucket **pprev;
 431         struct bucket *cur;
 432
 433         lh->remove_calls++;
 434
 435         k = afs_lhash_address(lh, key);
 436         for (pprev = 0, cur = lh->table[k];
 437              cur;
 438              pprev = &cur->next, cur = cur->next) {
 439                 lh->remove_tests++;
 440                 if (lh->equal(data, cur->data)) {
 441                         void *data = cur->data;
 442
 443                         if (pprev) {
 444                                 *pprev = cur->next;
 445                         } else {
 446                                 lh->table[k] = cur->next;
 447                         }
 448
 449                         afs_atomlist_put(lh->bucket_list, cur);
 450
 451                         lh->ndata--;
 452
 453                         return data;
 454                 }
 455         }
 456
 457         return 0;
 458 }
 459
 460 int
 461 afs_lhash_enter(
 462     afs_lhash *lh,
 463     unsigned key,
 464     void *data
 465 )
 466 {
 467         size_t k;
 468         struct bucket *buck;
 469
 470         buck = afs_atomlist_get(lh->bucket_list);
 471         if (!buck) {
 472                 return -1;
 473         }
 474
 475         buck->key = key;
 476         buck->data = data;
 477
 478         k = afs_lhash_address(lh, key);
 479
 480         buck->next = lh->table[k];
 481         lh->table[k] = buck;
 482
 483         lh->ndata++;
 484
 485         /*
 486          * The load factor is the number of data items in the hash
 487          * table divided by the logical table length.  We expand the
 488          * table when the load factor exceeds a predetermined limit
 489          * (LOAD_FACTOR).  To avoid floating point, we multiply both
 490          * sides of the inequality by the logical table length...
 491          */
 492         if (lh->ndata > LOAD_FACTOR * lh->ltable) {
 493                 afs_lhash_expand(lh);
 494 #if 0
 495                 printf("lh->p = %d; lh->maxp = %d\n",
 496                         lh->p, lh->maxp);
 497 #endif
 498         }
 499
 500 #ifdef CHECK_INVARIANTS
 501         assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
 502 #endif  /* CHECK_INVARIANTS */
 503
 504         return 0;
 505 }
 506
 507 int
 508 afs_lhash_stat(
 509     afs_lhash *lh,
 510     struct afs_lhash_stat *sb
 511 )
 512 {
 513         size_t k;
 514
 515         int     min_chain_length = -1;
 516         int     max_chain_length = -1;
 517         size_t  buckets = lh->ltable;
 518         size_t  records = 0;
 519
 520         if (!sb) {
 521                 return -1;
 522         }
 523
 524 #ifdef CHECK_INVARIANTS
 525         assert(lh->ltable <= lh->ntable);
 526 #endif  /* CHECK_INVARIANTS */
 527
 528         for (k = 0; k < lh->ltable; k++) {
 529                 struct bucket *buck;
 530                 int chain_length = 0;
 531
 532                 for (buck = lh->table[k]; buck; buck = buck->next) {
 533                         chain_length++;
 534                         records++;
 535                 }
 536
 537                 if (min_chain_length == -1) {
 538                         min_chain_length = chain_length;
 539                 }
 540
 541                 if (max_chain_length == -1) {
 542                         max_chain_length = chain_length;
 543                 }
 544
 545                 if (chain_length < min_chain_length) {
 546                         min_chain_length = chain_length;
 547                 }
 548
 549                 if (max_chain_length < chain_length) {
 550                         max_chain_length = chain_length;
 551                 }
 552         }
 553
 554         sb->min_chain_length = min_chain_length;
 555         sb->max_chain_length = max_chain_length;
 556         sb->buckets = buckets;
 557         sb->records = records;
 558
 559 #ifdef CHECK_INVARIANTS
 560         assert(lh->ndata == records);
 561 #endif  /* CHECK_INVARIANTS */
 562
 563         sb->search_calls = lh->search_calls;
 564         sb->search_tests = lh->search_tests;
 565         sb->remove_calls = lh->remove_calls;
 566         sb->remove_tests = lh->remove_tests;
 567
 568         return 0;
 569 }